aboutsummaryrefslogtreecommitdiff
path: root/src/Specific
diff options
context:
space:
mode:
authorGravatar Jason Gross <jgross@mit.edu>2017-11-02 02:20:52 -0400
committerGravatar Jason Gross <jgross@mit.edu>2017-11-02 02:20:52 -0400
commit37a8b2a0c14082298d77f0a4858ab3402d9d1c6c (patch)
treebd1b4c8c34485332ff15d58272efb38468ea72e5 /src/Specific
parent11144d2e698c4a263e59acb02226d383865f74e0 (diff)
Update display logs and c files
Diffstat (limited to 'src/Specific')
-rw-r--r--src/Specific/montgomery32_2e127m1/feadd.c66
-rw-r--r--src/Specific/montgomery32_2e127m1/femul.c262
-rw-r--r--src/Specific/montgomery32_2e127m1/fenz.c36
-rw-r--r--src/Specific/montgomery32_2e127m1/feopp.c62
-rw-r--r--src/Specific/montgomery32_2e127m1/fesub.c66
-rw-r--r--src/Specific/montgomery32_2e129m25/feadd.c76
-rw-r--r--src/Specific/montgomery32_2e129m25/femul.c372
-rw-r--r--src/Specific/montgomery32_2e129m25/fenz.c39
-rw-r--r--src/Specific/montgomery32_2e129m25/feopp.c71
-rw-r--r--src/Specific/montgomery32_2e129m25/fesub.c76
-rw-r--r--src/Specific/montgomery32_2e130m5/feadd.c76
-rw-r--r--src/Specific/montgomery32_2e130m5/femul.c231
-rw-r--r--src/Specific/montgomery32_2e130m5/fenz.c39
-rw-r--r--src/Specific/montgomery32_2e130m5/feopp.c71
-rw-r--r--src/Specific/montgomery32_2e130m5/fesub.c76
-rw-r--r--src/Specific/montgomery32_2e137m13/feadd.c76
-rw-r--r--src/Specific/montgomery32_2e137m13/femul.c392
-rw-r--r--src/Specific/montgomery32_2e137m13/fenz.c39
-rw-r--r--src/Specific/montgomery32_2e137m13/feopp.c71
-rw-r--r--src/Specific/montgomery32_2e137m13/fesub.c76
-rw-r--r--src/Specific/montgomery32_2e140m27/feadd.c76
-rw-r--r--src/Specific/montgomery32_2e140m27/femul.c392
-rw-r--r--src/Specific/montgomery32_2e140m27/fenz.c39
-rw-r--r--src/Specific/montgomery32_2e140m27/feopp.c71
-rw-r--r--src/Specific/montgomery32_2e140m27/fesub.c76
-rw-r--r--src/Specific/montgomery32_2e141m9/feadd.c76
-rw-r--r--src/Specific/montgomery32_2e141m9/femul.c392
-rw-r--r--src/Specific/montgomery32_2e141m9/fenz.c39
-rw-r--r--src/Specific/montgomery32_2e141m9/feopp.c71
-rw-r--r--src/Specific/montgomery32_2e141m9/fesub.c76
-rw-r--r--src/Specific/montgomery32_2e150m3/feadd.c76
-rw-r--r--src/Specific/montgomery32_2e150m3/femul.c392
-rw-r--r--src/Specific/montgomery32_2e150m3/fenz.c39
-rw-r--r--src/Specific/montgomery32_2e150m3/feopp.c71
-rw-r--r--src/Specific/montgomery32_2e150m3/fesub.c76
-rw-r--r--src/Specific/montgomery32_2e150m5/feadd.c76
-rw-r--r--src/Specific/montgomery32_2e150m5/femul.c392
-rw-r--r--src/Specific/montgomery32_2e150m5/fenz.c39
-rw-r--r--src/Specific/montgomery32_2e150m5/feopp.c71
-rw-r--r--src/Specific/montgomery32_2e150m5/fesub.c76
-rw-r--r--src/Specific/montgomery32_2e152m17/feadd.c76
-rw-r--r--src/Specific/montgomery32_2e152m17/femul.c392
-rw-r--r--src/Specific/montgomery32_2e152m17/fenz.c39
-rw-r--r--src/Specific/montgomery32_2e152m17/feopp.c71
-rw-r--r--src/Specific/montgomery32_2e152m17/fesub.c76
-rw-r--r--src/Specific/montgomery32_2e158m15/feadd.c76
-rw-r--r--src/Specific/montgomery32_2e158m15/femul.c392
-rw-r--r--src/Specific/montgomery32_2e158m15/fenz.c39
-rw-r--r--src/Specific/montgomery32_2e158m15/feopp.c71
-rw-r--r--src/Specific/montgomery32_2e158m15/fesub.c76
-rw-r--r--src/Specific/montgomery32_2e165m25/feadd.c86
-rw-r--r--src/Specific/montgomery32_2e165m25/femul.c308
-rw-r--r--src/Specific/montgomery32_2e165m25/fenz.c42
-rw-r--r--src/Specific/montgomery32_2e165m25/feopp.c80
-rw-r--r--src/Specific/montgomery32_2e165m25/fesub.c86
-rw-r--r--src/Specific/montgomery32_2e166m5/feadd.c86
-rw-r--r--src/Specific/montgomery32_2e166m5/femul.c308
-rw-r--r--src/Specific/montgomery32_2e166m5/fenz.c42
-rw-r--r--src/Specific/montgomery32_2e166m5/feopp.c80
-rw-r--r--src/Specific/montgomery32_2e166m5/fesub.c86
-rw-r--r--src/Specific/montgomery32_2e171m19/feadd.c86
-rw-r--r--src/Specific/montgomery32_2e171m19/femul.c538
-rw-r--r--src/Specific/montgomery32_2e171m19/fenz.c42
-rw-r--r--src/Specific/montgomery32_2e171m19/feopp.c80
-rw-r--r--src/Specific/montgomery32_2e171m19/fesub.c86
-rw-r--r--src/Specific/montgomery32_2e174m17/feadd.c86
-rw-r--r--src/Specific/montgomery32_2e174m17/femul.c538
-rw-r--r--src/Specific/montgomery32_2e174m17/fenz.c42
-rw-r--r--src/Specific/montgomery32_2e174m17/feopp.c80
-rw-r--r--src/Specific/montgomery32_2e174m17/fesub.c86
-rw-r--r--src/Specific/montgomery32_2e174m3/feadd.c86
-rw-r--r--src/Specific/montgomery32_2e174m3/femul.c538
-rw-r--r--src/Specific/montgomery32_2e174m3/fenz.c42
-rw-r--r--src/Specific/montgomery32_2e174m3/feopp.c80
-rw-r--r--src/Specific/montgomery32_2e174m3/fesub.c86
-rw-r--r--src/Specific/montgomery32_2e189m25/feadd.c86
-rw-r--r--src/Specific/montgomery32_2e189m25/femul.c538
-rw-r--r--src/Specific/montgomery32_2e189m25/fenz.c42
-rw-r--r--src/Specific/montgomery32_2e189m25/feopp.c80
-rw-r--r--src/Specific/montgomery32_2e189m25/fesub.c86
-rw-r--r--src/Specific/montgomery32_2e190m11/feadd.c86
-rw-r--r--src/Specific/montgomery32_2e190m11/femul.c538
-rw-r--r--src/Specific/montgomery32_2e190m11/fenz.c42
-rw-r--r--src/Specific/montgomery32_2e190m11/feopp.c80
-rw-r--r--src/Specific/montgomery32_2e190m11/fesub.c86
-rw-r--r--src/Specific/montgomery32_2e191m19/feadd.c86
-rw-r--r--src/Specific/montgomery32_2e191m19/femul.c538
-rw-r--r--src/Specific/montgomery32_2e191m19/fenz.c42
-rw-r--r--src/Specific/montgomery32_2e191m19/feopp.c80
-rw-r--r--src/Specific/montgomery32_2e191m19/fesub.c86
-rw-r--r--src/Specific/montgomery32_2e192m2e64m1/feadd.c86
-rw-r--r--src/Specific/montgomery32_2e192m2e64m1/femul.c526
-rw-r--r--src/Specific/montgomery32_2e192m2e64m1/fenz.c42
-rw-r--r--src/Specific/montgomery32_2e192m2e64m1/feopp.c80
-rw-r--r--src/Specific/montgomery32_2e192m2e64m1/fesub.c86
-rw-r--r--src/Specific/montgomery32_2e194m33/feadd.c96
-rw-r--r--src/Specific/montgomery32_2e194m33/fenz.c45
-rw-r--r--src/Specific/montgomery32_2e196m15/feadd.c96
-rw-r--r--src/Specific/montgomery32_2e196m15/fenz.c45
-rw-r--r--src/Specific/montgomery32_2e198m17/feadd.c96
-rw-r--r--src/Specific/montgomery32_2e198m17/fenz.c45
-rw-r--r--src/Specific/montgomery32_2e205m45x2e198m1/feadd.c96
-rw-r--r--src/Specific/montgomery32_2e205m45x2e198m1/fenz.c45
-rw-r--r--src/Specific/montgomery32_2e206m5/feadd.c96
-rw-r--r--src/Specific/montgomery32_2e206m5/fenz.c45
-rw-r--r--src/Specific/montgomery32_2e212m29/feadd.c96
-rw-r--r--src/Specific/montgomery32_2e212m29/fenz.c45
-rw-r--r--src/Specific/montgomery32_2e213m3/feadd.c96
-rw-r--r--src/Specific/montgomery32_2e213m3/fenz.c45
-rw-r--r--src/Specific/montgomery32_2e216m2e108m1/feadd.c96
-rw-r--r--src/Specific/montgomery32_2e216m2e108m1/fenz.c45
-rw-r--r--src/Specific/montgomery32_2e221m3/feadd.c96
-rw-r--r--src/Specific/montgomery32_2e221m3/fenz.c45
-rw-r--r--src/Specific/montgomery32_2e222m117/feadd.c96
-rw-r--r--src/Specific/montgomery32_2e222m117/fenz.c45
-rw-r--r--src/Specific/montgomery32_2e224m2e96p1/feadd.c96
-rw-r--r--src/Specific/montgomery32_2e224m2e96p1/femul.c652
-rw-r--r--src/Specific/montgomery32_2e224m2e96p1/fenz.c45
-rw-r--r--src/Specific/montgomery32_2e226m5/feadd.c106
-rw-r--r--src/Specific/montgomery32_2e226m5/fenz.c48
-rw-r--r--src/Specific/montgomery32_2e230m27/feadd.c106
-rw-r--r--src/Specific/montgomery32_2e230m27/fenz.c48
-rw-r--r--src/Specific/montgomery32_2e235m15/feadd.c106
-rw-r--r--src/Specific/montgomery32_2e235m15/fenz.c48
-rw-r--r--src/Specific/montgomery32_2e243m9/feadd.c106
-rw-r--r--src/Specific/montgomery32_2e243m9/fenz.c48
-rw-r--r--src/Specific/montgomery32_2e251m9/feadd.c106
-rw-r--r--src/Specific/montgomery32_2e251m9/fenz.c48
-rw-r--r--src/Specific/montgomery32_2e254m127x2e240m1/feadd.c106
-rw-r--r--src/Specific/montgomery32_2e254m127x2e240m1/fenz.c48
-rw-r--r--src/Specific/montgomery32_2e255m19/feadd.c106
-rw-r--r--src/Specific/montgomery32_2e255m19/fenz.c48
-rw-r--r--src/Specific/montgomery32_2e255m2e4m2e1m1/feadd.c106
-rw-r--r--src/Specific/montgomery32_2e255m2e4m2e1m1/fenz.c48
-rw-r--r--src/Specific/montgomery32_2e255m765/feadd.c106
-rw-r--r--src/Specific/montgomery32_2e255m765/fenz.c48
-rw-r--r--src/Specific/montgomery32_2e256m189/feadd.c106
-rw-r--r--src/Specific/montgomery32_2e256m189/fenz.c48
-rw-r--r--src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/feadd.c106
-rw-r--r--src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/fenz.c48
-rw-r--r--src/Specific/montgomery32_2e256m2e32m977/feadd.c106
-rw-r--r--src/Specific/montgomery32_2e256m2e32m977/fenz.c48
-rw-r--r--src/Specific/montgomery32_2e256m88x2e240m1/feadd.c106
-rw-r--r--src/Specific/montgomery32_2e256m88x2e240m1/fenz.c48
-rw-r--r--src/Specific/montgomery32_2e266m3/feadd.c116
-rw-r--r--src/Specific/montgomery32_2e266m3/fenz.c51
-rw-r--r--src/Specific/montgomery32_2e285m9/feadd.c116
-rw-r--r--src/Specific/montgomery32_2e285m9/fenz.c51
-rw-r--r--src/Specific/montgomery32_2e291m19/feadd.c126
-rw-r--r--src/Specific/montgomery32_2e291m19/fenz.c54
-rw-r--r--src/Specific/montgomery32_2e321m9/feadd.c136
-rw-r--r--src/Specific/montgomery32_2e321m9/fenz.c57
-rw-r--r--src/Specific/montgomery32_2e322m2e161m1/feadd.c136
-rw-r--r--src/Specific/montgomery32_2e322m2e161m1/fenz.c57
-rw-r--r--src/Specific/montgomery32_2e336m17/feadd.c136
-rw-r--r--src/Specific/montgomery32_2e336m17/fenz.c57
-rw-r--r--src/Specific/montgomery32_2e336m3/feadd.c136
-rw-r--r--src/Specific/montgomery32_2e336m3/fenz.c57
-rw-r--r--src/Specific/montgomery32_2e338m15/feadd.c136
-rw-r--r--src/Specific/montgomery32_2e338m15/fenz.c57
-rw-r--r--src/Specific/montgomery32_2e369m25/feadd.c146
-rw-r--r--src/Specific/montgomery32_2e369m25/fenz.c60
-rw-r--r--src/Specific/montgomery32_2e379m19/feadd.c146
-rw-r--r--src/Specific/montgomery32_2e379m19/fenz.c60
-rw-r--r--src/Specific/montgomery32_2e382m105/feadd.c146
-rw-r--r--src/Specific/montgomery32_2e382m105/fenz.c60
-rw-r--r--src/Specific/montgomery32_2e383m187/feadd.c146
-rw-r--r--src/Specific/montgomery32_2e383m187/fenz.c60
-rw-r--r--src/Specific/montgomery32_2e383m31/feadd.c146
-rw-r--r--src/Specific/montgomery32_2e383m31/fenz.c60
-rw-r--r--src/Specific/montgomery32_2e383m421/feadd.c146
-rw-r--r--src/Specific/montgomery32_2e383m421/fenz.c60
-rw-r--r--src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/feadd.c146
-rw-r--r--src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/fenz.c60
-rw-r--r--src/Specific/montgomery32_2e384m317/feadd.c146
-rw-r--r--src/Specific/montgomery32_2e384m317/fenz.c60
-rw-r--r--src/Specific/montgomery32_2e384m5x2e368m1/feadd.c146
-rw-r--r--src/Specific/montgomery32_2e384m5x2e368m1/fenz.c60
-rw-r--r--src/Specific/montgomery32_2e384m79x2e376m1/feadd.c146
-rw-r--r--src/Specific/montgomery32_2e384m79x2e376m1/fenz.c60
-rw-r--r--src/Specific/montgomery32_2e389m21/feadd.c156
-rw-r--r--src/Specific/montgomery32_2e389m21/fenz.c63
-rw-r--r--src/Specific/montgomery32_2e401m31/feadd.c156
-rw-r--r--src/Specific/montgomery32_2e401m31/fenz.c63
-rw-r--r--src/Specific/montgomery32_2e413m21/feadd.c156
-rw-r--r--src/Specific/montgomery32_2e413m21/fenz.c63
-rw-r--r--src/Specific/montgomery32_2e414m17/feadd.c156
-rw-r--r--src/Specific/montgomery32_2e414m17/fenz.c63
-rw-r--r--src/Specific/montgomery32_2e416m2e208m1/feadd.c156
-rw-r--r--src/Specific/montgomery32_2e416m2e208m1/fenz.c63
-rw-r--r--src/Specific/montgomery32_2e444m17/feadd.c166
-rw-r--r--src/Specific/montgomery32_2e444m17/fenz.c66
-rw-r--r--src/Specific/montgomery32_2e448m2e224m1/feadd.c166
-rw-r--r--src/Specific/montgomery32_2e448m2e224m1/fenz.c66
-rw-r--r--src/Specific/montgomery32_2e450m2e225m1/feadd.c176
-rw-r--r--src/Specific/montgomery32_2e450m2e225m1/fenz.c69
-rw-r--r--src/Specific/montgomery32_2e452m3/feadd.c176
-rw-r--r--src/Specific/montgomery32_2e452m3/fenz.c69
-rw-r--r--src/Specific/montgomery32_2e468m17/feadd.c176
-rw-r--r--src/Specific/montgomery32_2e468m17/fenz.c69
-rw-r--r--src/Specific/montgomery32_2e480m2e240m1/feadd.c176
-rw-r--r--src/Specific/montgomery32_2e480m2e240m1/fenz.c69
-rw-r--r--src/Specific/montgomery32_2e488m17/feadd.c186
-rw-r--r--src/Specific/montgomery32_2e488m17/fenz.c72
-rw-r--r--src/Specific/montgomery32_2e489m21/feadd.c186
-rw-r--r--src/Specific/montgomery32_2e489m21/fenz.c72
-rw-r--r--src/Specific/montgomery32_2e495m31/feadd.c186
-rw-r--r--src/Specific/montgomery32_2e495m31/fenz.c72
-rw-r--r--src/Specific/montgomery32_2e510m290x2e496m1/feadd.c100
-rw-r--r--src/Specific/montgomery32_2e510m290x2e496m1/feaddDisplay.log56
-rw-r--r--src/Specific/montgomery32_2e510m290x2e496m1/fenz.c72
-rw-r--r--src/Specific/montgomery32_2e511m187/feadd.c186
-rw-r--r--src/Specific/montgomery32_2e511m187/fenz.c72
-rw-r--r--src/Specific/montgomery32_2e511m481/feadd.c186
-rw-r--r--src/Specific/montgomery32_2e511m481/fenz.c72
-rw-r--r--src/Specific/montgomery32_2e512m491x2e496m1/feadd.c100
-rw-r--r--src/Specific/montgomery32_2e512m491x2e496m1/feaddDisplay.log2
-rw-r--r--src/Specific/montgomery32_2e512m491x2e496m1/fenz.c72
-rw-r--r--src/Specific/montgomery32_2e512m569/feadd.c186
-rw-r--r--src/Specific/montgomery32_2e512m569/fenz.c72
-rw-r--r--src/Specific/montgomery32_2e521m1/fenz.c75
-rw-r--r--src/Specific/montgomery64_2e127m1/feadd.c46
-rw-r--r--src/Specific/montgomery64_2e127m1/femul.c94
-rw-r--r--src/Specific/montgomery64_2e127m1/fenz.c28
-rw-r--r--src/Specific/montgomery64_2e127m1/feopp.c44
-rw-r--r--src/Specific/montgomery64_2e127m1/fesub.c46
-rw-r--r--src/Specific/montgomery64_2e129m25/feadd.c56
-rw-r--r--src/Specific/montgomery64_2e129m25/femul.c160
-rw-r--r--src/Specific/montgomery64_2e129m25/fenz.c33
-rw-r--r--src/Specific/montgomery64_2e129m25/feopp.c53
-rw-r--r--src/Specific/montgomery64_2e129m25/fesub.c56
-rw-r--r--src/Specific/montgomery64_2e130m5/feadd.c56
-rw-r--r--src/Specific/montgomery64_2e130m5/femul.c113
-rw-r--r--src/Specific/montgomery64_2e130m5/fenz.c33
-rw-r--r--src/Specific/montgomery64_2e130m5/feopp.c53
-rw-r--r--src/Specific/montgomery64_2e130m5/fesub.c56
-rw-r--r--src/Specific/montgomery64_2e137m13/feadd.c56
-rw-r--r--src/Specific/montgomery64_2e137m13/femul.c172
-rw-r--r--src/Specific/montgomery64_2e137m13/fenz.c33
-rw-r--r--src/Specific/montgomery64_2e137m13/feopp.c53
-rw-r--r--src/Specific/montgomery64_2e137m13/fesub.c56
-rw-r--r--src/Specific/montgomery64_2e140m27/feadd.c56
-rw-r--r--src/Specific/montgomery64_2e140m27/femul.c172
-rw-r--r--src/Specific/montgomery64_2e140m27/fenz.c33
-rw-r--r--src/Specific/montgomery64_2e140m27/feopp.c53
-rw-r--r--src/Specific/montgomery64_2e140m27/fesub.c56
-rw-r--r--src/Specific/montgomery64_2e141m9/feadd.c56
-rw-r--r--src/Specific/montgomery64_2e141m9/femul.c172
-rw-r--r--src/Specific/montgomery64_2e141m9/fenz.c33
-rw-r--r--src/Specific/montgomery64_2e141m9/feopp.c53
-rw-r--r--src/Specific/montgomery64_2e141m9/fesub.c56
-rw-r--r--src/Specific/montgomery64_2e150m3/feadd.c56
-rw-r--r--src/Specific/montgomery64_2e150m3/femul.c172
-rw-r--r--src/Specific/montgomery64_2e150m3/fenz.c33
-rw-r--r--src/Specific/montgomery64_2e150m3/feopp.c53
-rw-r--r--src/Specific/montgomery64_2e150m3/fesub.c56
-rw-r--r--src/Specific/montgomery64_2e150m5/feadd.c56
-rw-r--r--src/Specific/montgomery64_2e150m5/femul.c172
-rw-r--r--src/Specific/montgomery64_2e150m5/fenz.c33
-rw-r--r--src/Specific/montgomery64_2e150m5/feopp.c53
-rw-r--r--src/Specific/montgomery64_2e150m5/fesub.c56
-rw-r--r--src/Specific/montgomery64_2e152m17/feadd.c56
-rw-r--r--src/Specific/montgomery64_2e152m17/femul.c172
-rw-r--r--src/Specific/montgomery64_2e152m17/fenz.c33
-rw-r--r--src/Specific/montgomery64_2e152m17/feopp.c53
-rw-r--r--src/Specific/montgomery64_2e152m17/fesub.c56
-rw-r--r--src/Specific/montgomery64_2e158m15/feadd.c56
-rw-r--r--src/Specific/montgomery64_2e158m15/femul.c172
-rw-r--r--src/Specific/montgomery64_2e158m15/fenz.c33
-rw-r--r--src/Specific/montgomery64_2e158m15/feopp.c53
-rw-r--r--src/Specific/montgomery64_2e158m15/fesub.c56
-rw-r--r--src/Specific/montgomery64_2e165m25/feadd.c56
-rw-r--r--src/Specific/montgomery64_2e165m25/femul.c172
-rw-r--r--src/Specific/montgomery64_2e165m25/fenz.c33
-rw-r--r--src/Specific/montgomery64_2e165m25/feopp.c53
-rw-r--r--src/Specific/montgomery64_2e165m25/fesub.c56
-rw-r--r--src/Specific/montgomery64_2e166m5/feadd.c56
-rw-r--r--src/Specific/montgomery64_2e166m5/femul.c172
-rw-r--r--src/Specific/montgomery64_2e166m5/fenz.c33
-rw-r--r--src/Specific/montgomery64_2e166m5/feopp.c53
-rw-r--r--src/Specific/montgomery64_2e166m5/fesub.c56
-rw-r--r--src/Specific/montgomery64_2e171m19/feadd.c56
-rw-r--r--src/Specific/montgomery64_2e171m19/femul.c172
-rw-r--r--src/Specific/montgomery64_2e171m19/fenz.c33
-rw-r--r--src/Specific/montgomery64_2e171m19/feopp.c53
-rw-r--r--src/Specific/montgomery64_2e171m19/fesub.c56
-rw-r--r--src/Specific/montgomery64_2e174m17/feadd.c56
-rw-r--r--src/Specific/montgomery64_2e174m17/femul.c172
-rw-r--r--src/Specific/montgomery64_2e174m17/fenz.c33
-rw-r--r--src/Specific/montgomery64_2e174m17/feopp.c53
-rw-r--r--src/Specific/montgomery64_2e174m17/fesub.c56
-rw-r--r--src/Specific/montgomery64_2e174m3/feadd.c56
-rw-r--r--src/Specific/montgomery64_2e174m3/femul.c172
-rw-r--r--src/Specific/montgomery64_2e174m3/fenz.c33
-rw-r--r--src/Specific/montgomery64_2e174m3/feopp.c53
-rw-r--r--src/Specific/montgomery64_2e174m3/fesub.c56
-rw-r--r--src/Specific/montgomery64_2e189m25/feadd.c56
-rw-r--r--src/Specific/montgomery64_2e189m25/femul.c172
-rw-r--r--src/Specific/montgomery64_2e189m25/fenz.c33
-rw-r--r--src/Specific/montgomery64_2e189m25/feopp.c53
-rw-r--r--src/Specific/montgomery64_2e189m25/fesub.c56
-rw-r--r--src/Specific/montgomery64_2e190m11/feadd.c56
-rw-r--r--src/Specific/montgomery64_2e190m11/femul.c172
-rw-r--r--src/Specific/montgomery64_2e190m11/fenz.c33
-rw-r--r--src/Specific/montgomery64_2e190m11/feopp.c53
-rw-r--r--src/Specific/montgomery64_2e190m11/fesub.c56
-rw-r--r--src/Specific/montgomery64_2e191m19/feadd.c56
-rw-r--r--src/Specific/montgomery64_2e191m19/femul.c172
-rw-r--r--src/Specific/montgomery64_2e191m19/fenz.c33
-rw-r--r--src/Specific/montgomery64_2e191m19/feopp.c53
-rw-r--r--src/Specific/montgomery64_2e191m19/fesub.c56
-rw-r--r--src/Specific/montgomery64_2e192m2e64m1/feadd.c56
-rw-r--r--src/Specific/montgomery64_2e192m2e64m1/femul.c166
-rw-r--r--src/Specific/montgomery64_2e192m2e64m1/fenz.c33
-rw-r--r--src/Specific/montgomery64_2e192m2e64m1/feopp.c53
-rw-r--r--src/Specific/montgomery64_2e192m2e64m1/fesub.c56
-rw-r--r--src/Specific/montgomery64_2e194m33/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e194m33/femul.c166
-rw-r--r--src/Specific/montgomery64_2e194m33/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e194m33/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e194m33/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e196m15/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e196m15/femul.c166
-rw-r--r--src/Specific/montgomery64_2e196m15/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e196m15/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e196m15/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e198m17/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e198m17/femul.c166
-rw-r--r--src/Specific/montgomery64_2e198m17/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e198m17/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e198m17/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e205m45x2e198m1/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e205m45x2e198m1/femul.c262
-rw-r--r--src/Specific/montgomery64_2e205m45x2e198m1/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e205m45x2e198m1/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e205m45x2e198m1/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e206m5/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e206m5/femul.c270
-rw-r--r--src/Specific/montgomery64_2e206m5/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e206m5/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e206m5/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e212m29/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e212m29/femul.c270
-rw-r--r--src/Specific/montgomery64_2e212m29/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e212m29/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e212m29/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e213m3/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e213m3/femul.c270
-rw-r--r--src/Specific/montgomery64_2e213m3/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e213m3/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e213m3/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e216m2e108m1/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e216m2e108m1/femul.c262
-rw-r--r--src/Specific/montgomery64_2e216m2e108m1/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e216m2e108m1/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e216m2e108m1/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e221m3/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e221m3/femul.c270
-rw-r--r--src/Specific/montgomery64_2e221m3/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e221m3/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e221m3/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e222m117/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e222m117/femul.c270
-rw-r--r--src/Specific/montgomery64_2e222m117/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e222m117/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e222m117/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e224m2e96p1/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e224m2e96p1/femul.c254
-rw-r--r--src/Specific/montgomery64_2e224m2e96p1/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e224m2e96p1/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e224m2e96p1/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e226m5/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e226m5/femul.c270
-rw-r--r--src/Specific/montgomery64_2e226m5/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e226m5/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e226m5/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e230m27/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e230m27/femul.c270
-rw-r--r--src/Specific/montgomery64_2e230m27/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e230m27/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e230m27/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e235m15/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e235m15/femul.c270
-rw-r--r--src/Specific/montgomery64_2e235m15/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e235m15/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e235m15/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e243m9/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e243m9/femul.c270
-rw-r--r--src/Specific/montgomery64_2e243m9/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e243m9/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e243m9/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e251m9/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e251m9/femul.c270
-rw-r--r--src/Specific/montgomery64_2e251m9/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e251m9/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e251m9/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e254m127x2e240m1/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e254m127x2e240m1/femul.c262
-rw-r--r--src/Specific/montgomery64_2e254m127x2e240m1/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e254m127x2e240m1/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e254m127x2e240m1/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e255m19/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e255m19/femul.c270
-rw-r--r--src/Specific/montgomery64_2e255m19/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e255m19/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e255m19/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e255m2e4m2e1m1/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e255m2e4m2e1m1/femul.c270
-rw-r--r--src/Specific/montgomery64_2e255m2e4m2e1m1/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e255m2e4m2e1m1/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e255m2e4m2e1m1/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e255m765/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e255m765/femul.c270
-rw-r--r--src/Specific/montgomery64_2e255m765/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e255m765/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e255m765/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e256m189/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e256m189/femul.c270
-rw-r--r--src/Specific/montgomery64_2e256m189/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e256m189/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e256m189/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/femul.c254
-rw-r--r--src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feopp.c60
-rw-r--r--src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fesub.c64
-rw-r--r--src/Specific/montgomery64_2e256m2e32m977/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e256m2e32m977/femul.c270
-rw-r--r--src/Specific/montgomery64_2e256m2e32m977/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e256m2e32m977/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e256m2e32m977/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e256m88x2e240m1/feadd.c66
-rw-r--r--src/Specific/montgomery64_2e256m88x2e240m1/femul.c262
-rw-r--r--src/Specific/montgomery64_2e256m88x2e240m1/fenz.c36
-rw-r--r--src/Specific/montgomery64_2e256m88x2e240m1/feopp.c62
-rw-r--r--src/Specific/montgomery64_2e256m88x2e240m1/fesub.c66
-rw-r--r--src/Specific/montgomery64_2e266m3/feadd.c76
-rw-r--r--src/Specific/montgomery64_2e266m3/femul.c392
-rw-r--r--src/Specific/montgomery64_2e266m3/fenz.c39
-rw-r--r--src/Specific/montgomery64_2e266m3/feopp.c71
-rw-r--r--src/Specific/montgomery64_2e266m3/fesub.c76
-rw-r--r--src/Specific/montgomery64_2e285m9/feadd.c76
-rw-r--r--src/Specific/montgomery64_2e285m9/femul.c392
-rw-r--r--src/Specific/montgomery64_2e285m9/fenz.c39
-rw-r--r--src/Specific/montgomery64_2e285m9/feopp.c71
-rw-r--r--src/Specific/montgomery64_2e285m9/fesub.c76
-rw-r--r--src/Specific/montgomery64_2e291m19/feadd.c76
-rw-r--r--src/Specific/montgomery64_2e291m19/femul.c392
-rw-r--r--src/Specific/montgomery64_2e291m19/fenz.c39
-rw-r--r--src/Specific/montgomery64_2e291m19/feopp.c71
-rw-r--r--src/Specific/montgomery64_2e291m19/fesub.c76
-rw-r--r--src/Specific/montgomery64_2e321m9/feadd.c86
-rw-r--r--src/Specific/montgomery64_2e321m9/femul.c514
-rw-r--r--src/Specific/montgomery64_2e321m9/fenz.c42
-rw-r--r--src/Specific/montgomery64_2e321m9/feopp.c80
-rw-r--r--src/Specific/montgomery64_2e321m9/fesub.c86
-rw-r--r--src/Specific/montgomery64_2e322m2e161m1/feadd.c86
-rw-r--r--src/Specific/montgomery64_2e322m2e161m1/femul.c301
-rw-r--r--src/Specific/montgomery64_2e322m2e161m1/fenz.c42
-rw-r--r--src/Specific/montgomery64_2e322m2e161m1/feopp.c80
-rw-r--r--src/Specific/montgomery64_2e322m2e161m1/fesub.c86
-rw-r--r--src/Specific/montgomery64_2e336m17/feadd.c86
-rw-r--r--src/Specific/montgomery64_2e336m17/femul.c538
-rw-r--r--src/Specific/montgomery64_2e336m17/fenz.c42
-rw-r--r--src/Specific/montgomery64_2e336m17/feopp.c80
-rw-r--r--src/Specific/montgomery64_2e336m17/fesub.c86
-rw-r--r--src/Specific/montgomery64_2e336m3/feadd.c86
-rw-r--r--src/Specific/montgomery64_2e336m3/femul.c538
-rw-r--r--src/Specific/montgomery64_2e336m3/fenz.c42
-rw-r--r--src/Specific/montgomery64_2e336m3/feopp.c80
-rw-r--r--src/Specific/montgomery64_2e336m3/fesub.c86
-rw-r--r--src/Specific/montgomery64_2e338m15/feadd.c86
-rw-r--r--src/Specific/montgomery64_2e338m15/femul.c538
-rw-r--r--src/Specific/montgomery64_2e338m15/fenz.c42
-rw-r--r--src/Specific/montgomery64_2e338m15/feopp.c80
-rw-r--r--src/Specific/montgomery64_2e338m15/fesub.c86
-rw-r--r--src/Specific/montgomery64_2e369m25/feadd.c86
-rw-r--r--src/Specific/montgomery64_2e369m25/femul.c538
-rw-r--r--src/Specific/montgomery64_2e369m25/fenz.c42
-rw-r--r--src/Specific/montgomery64_2e369m25/feopp.c80
-rw-r--r--src/Specific/montgomery64_2e369m25/fesub.c86
-rw-r--r--src/Specific/montgomery64_2e379m19/feadd.c86
-rw-r--r--src/Specific/montgomery64_2e379m19/femul.c538
-rw-r--r--src/Specific/montgomery64_2e379m19/fenz.c42
-rw-r--r--src/Specific/montgomery64_2e379m19/feopp.c80
-rw-r--r--src/Specific/montgomery64_2e379m19/fesub.c86
-rw-r--r--src/Specific/montgomery64_2e382m105/feadd.c86
-rw-r--r--src/Specific/montgomery64_2e382m105/femul.c538
-rw-r--r--src/Specific/montgomery64_2e382m105/fenz.c42
-rw-r--r--src/Specific/montgomery64_2e382m105/feopp.c80
-rw-r--r--src/Specific/montgomery64_2e382m105/fesub.c86
-rw-r--r--src/Specific/montgomery64_2e383m187/feadd.c86
-rw-r--r--src/Specific/montgomery64_2e383m187/femul.c538
-rw-r--r--src/Specific/montgomery64_2e383m187/fenz.c42
-rw-r--r--src/Specific/montgomery64_2e383m187/feopp.c80
-rw-r--r--src/Specific/montgomery64_2e383m187/fesub.c86
-rw-r--r--src/Specific/montgomery64_2e383m31/feadd.c86
-rw-r--r--src/Specific/montgomery64_2e383m31/femul.c538
-rw-r--r--src/Specific/montgomery64_2e383m31/fenz.c42
-rw-r--r--src/Specific/montgomery64_2e383m31/feopp.c80
-rw-r--r--src/Specific/montgomery64_2e383m31/fesub.c86
-rw-r--r--src/Specific/montgomery64_2e383m421/feadd.c86
-rw-r--r--src/Specific/montgomery64_2e383m421/femul.c538
-rw-r--r--src/Specific/montgomery64_2e383m421/fenz.c42
-rw-r--r--src/Specific/montgomery64_2e383m421/feopp.c80
-rw-r--r--src/Specific/montgomery64_2e383m421/fesub.c86
-rw-r--r--src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feadd.c86
-rw-r--r--src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/femul.c538
-rw-r--r--src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fenz.c42
-rw-r--r--src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feopp.c80
-rw-r--r--src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fesub.c86
-rw-r--r--src/Specific/montgomery64_2e384m317/feadd.c86
-rw-r--r--src/Specific/montgomery64_2e384m317/femul.c538
-rw-r--r--src/Specific/montgomery64_2e384m317/fenz.c42
-rw-r--r--src/Specific/montgomery64_2e384m317/feopp.c80
-rw-r--r--src/Specific/montgomery64_2e384m317/fesub.c86
-rw-r--r--src/Specific/montgomery64_2e384m5x2e368m1/feadd.c86
-rw-r--r--src/Specific/montgomery64_2e384m5x2e368m1/femul.c526
-rw-r--r--src/Specific/montgomery64_2e384m5x2e368m1/fenz.c42
-rw-r--r--src/Specific/montgomery64_2e384m5x2e368m1/feopp.c80
-rw-r--r--src/Specific/montgomery64_2e384m5x2e368m1/fesub.c86
-rw-r--r--src/Specific/montgomery64_2e384m79x2e376m1/feadd.c86
-rw-r--r--src/Specific/montgomery64_2e384m79x2e376m1/femul.c526
-rw-r--r--src/Specific/montgomery64_2e384m79x2e376m1/fenz.c42
-rw-r--r--src/Specific/montgomery64_2e384m79x2e376m1/feopp.c80
-rw-r--r--src/Specific/montgomery64_2e384m79x2e376m1/fesub.c86
-rw-r--r--src/Specific/montgomery64_2e389m21/feadd.c96
-rw-r--r--src/Specific/montgomery64_2e389m21/fenz.c45
-rw-r--r--src/Specific/montgomery64_2e401m31/feadd.c96
-rw-r--r--src/Specific/montgomery64_2e401m31/fenz.c45
-rw-r--r--src/Specific/montgomery64_2e413m21/feadd.c96
-rw-r--r--src/Specific/montgomery64_2e413m21/fenz.c45
-rw-r--r--src/Specific/montgomery64_2e414m17/feadd.c96
-rw-r--r--src/Specific/montgomery64_2e414m17/fenz.c45
-rw-r--r--src/Specific/montgomery64_2e416m2e208m1/feadd.c96
-rw-r--r--src/Specific/montgomery64_2e416m2e208m1/fenz.c45
-rw-r--r--src/Specific/montgomery64_2e444m17/feadd.c96
-rw-r--r--src/Specific/montgomery64_2e444m17/fenz.c45
-rw-r--r--src/Specific/montgomery64_2e448m2e224m1/feadd.c96
-rw-r--r--src/Specific/montgomery64_2e448m2e224m1/fenz.c45
-rw-r--r--src/Specific/montgomery64_2e450m2e225m1/feadd.c106
-rw-r--r--src/Specific/montgomery64_2e450m2e225m1/fenz.c48
-rw-r--r--src/Specific/montgomery64_2e452m3/feadd.c106
-rw-r--r--src/Specific/montgomery64_2e452m3/fenz.c48
-rw-r--r--src/Specific/montgomery64_2e468m17/feadd.c106
-rw-r--r--src/Specific/montgomery64_2e468m17/fenz.c48
-rw-r--r--src/Specific/montgomery64_2e480m2e240m1/feadd.c106
-rw-r--r--src/Specific/montgomery64_2e480m2e240m1/fenz.c48
-rw-r--r--src/Specific/montgomery64_2e488m17/feadd.c106
-rw-r--r--src/Specific/montgomery64_2e488m17/fenz.c48
-rw-r--r--src/Specific/montgomery64_2e489m21/feadd.c106
-rw-r--r--src/Specific/montgomery64_2e489m21/fenz.c48
-rw-r--r--src/Specific/montgomery64_2e495m31/feadd.c106
-rw-r--r--src/Specific/montgomery64_2e495m31/fenz.c48
-rw-r--r--src/Specific/montgomery64_2e510m290x2e496m1/feadd.c106
-rw-r--r--src/Specific/montgomery64_2e510m290x2e496m1/fenz.c48
-rw-r--r--src/Specific/montgomery64_2e511m187/feadd.c106
-rw-r--r--src/Specific/montgomery64_2e511m187/fenz.c48
-rw-r--r--src/Specific/montgomery64_2e511m481/feadd.c106
-rw-r--r--src/Specific/montgomery64_2e511m481/fenz.c48
-rw-r--r--src/Specific/montgomery64_2e512m491x2e496m1/feadd.c106
-rw-r--r--src/Specific/montgomery64_2e512m491x2e496m1/fenz.c48
-rw-r--r--src/Specific/montgomery64_2e512m569/feadd.c106
-rw-r--r--src/Specific/montgomery64_2e512m569/fenz.c48
-rw-r--r--src/Specific/montgomery64_2e521m1/feadd.c116
-rw-r--r--src/Specific/montgomery64_2e521m1/fenz.c51
-rw-r--r--src/Specific/solinas32_2e127m1/femul.c106
-rw-r--r--src/Specific/solinas32_2e127m1/fesquare.c100
-rw-r--r--src/Specific/solinas32_2e127m1/freeze.c57
-rw-r--r--src/Specific/solinas32_2e129m25/femul.c106
-rw-r--r--src/Specific/solinas32_2e129m25/fesquare.c100
-rw-r--r--src/Specific/solinas32_2e129m25/freeze.c57
-rw-r--r--src/Specific/solinas32_2e130m5/femul.c94
-rw-r--r--src/Specific/solinas32_2e130m5/fesquare.c89
-rw-r--r--src/Specific/solinas32_2e130m5/freeze.c52
-rw-r--r--src/Specific/solinas32_2e137m13/femul.c130
-rw-r--r--src/Specific/solinas32_2e137m13/fesquare.c122
-rw-r--r--src/Specific/solinas32_2e137m13/freeze.c67
-rw-r--r--src/Specific/solinas32_2e140m27/femul.c106
-rw-r--r--src/Specific/solinas32_2e140m27/fesquare.c100
-rw-r--r--src/Specific/solinas32_2e140m27/freeze.c57
-rw-r--r--src/Specific/solinas32_2e141m9/femul.c106
-rw-r--r--src/Specific/solinas32_2e141m9/fesquare.c100
-rw-r--r--src/Specific/solinas32_2e141m9/freeze.c57
-rw-r--r--src/Specific/solinas32_2e150m3/femul.c94
-rw-r--r--src/Specific/solinas32_2e150m3/fesquare.c89
-rw-r--r--src/Specific/solinas32_2e150m3/freeze.c52
-rw-r--r--src/Specific/solinas32_2e150m5/femul.c106
-rw-r--r--src/Specific/solinas32_2e150m5/fesquare.c100
-rw-r--r--src/Specific/solinas32_2e150m5/freeze.c57
-rw-r--r--src/Specific/solinas32_2e152m17/femul.c106
-rw-r--r--src/Specific/solinas32_2e152m17/fesquare.c100
-rw-r--r--src/Specific/solinas32_2e152m17/freeze.c57
-rw-r--r--src/Specific/solinas32_2e158m15/femul.c106
-rw-r--r--src/Specific/solinas32_2e158m15/fesquare.c100
-rw-r--r--src/Specific/solinas32_2e158m15/freeze.c57
-rw-r--r--src/Specific/solinas32_2e165m25/femul.c142
-rw-r--r--src/Specific/solinas32_2e165m25/fesquare.c133
-rw-r--r--src/Specific/solinas32_2e165m25/freeze.c72
-rw-r--r--src/Specific/solinas32_2e166m5/femul.c166
-rw-r--r--src/Specific/solinas32_2e166m5/fesquare.c155
-rw-r--r--src/Specific/solinas32_2e166m5/freeze.c82
-rw-r--r--src/Specific/solinas32_2e171m19/femul.c142
-rw-r--r--src/Specific/solinas32_2e171m19/fesquare.c133
-rw-r--r--src/Specific/solinas32_2e171m19/freeze.c72
-rw-r--r--src/Specific/solinas32_2e174m17/femul.c142
-rw-r--r--src/Specific/solinas32_2e174m17/fesquare.c133
-rw-r--r--src/Specific/solinas32_2e174m17/freeze.c72
-rw-r--r--src/Specific/solinas32_2e174m3/femul.c106
-rw-r--r--src/Specific/solinas32_2e174m3/fesquare.c100
-rw-r--r--src/Specific/solinas32_2e174m3/freeze.c57
-rw-r--r--src/Specific/solinas32_2e189m25/femul.c118
-rw-r--r--src/Specific/solinas32_2e189m25/fesquare.c111
-rw-r--r--src/Specific/solinas32_2e189m25/freeze.c62
-rw-r--r--src/Specific/solinas32_2e190m11/femul.c118
-rw-r--r--src/Specific/solinas32_2e190m11/fesquare.c111
-rw-r--r--src/Specific/solinas32_2e190m11/freeze.c62
-rw-r--r--src/Specific/solinas32_2e191m19/femul.c154
-rw-r--r--src/Specific/solinas32_2e191m19/fesquare.c144
-rw-r--r--src/Specific/solinas32_2e191m19/freeze.c77
-rw-r--r--src/Specific/solinas32_2e192m2e64m1/femul.c152
-rw-r--r--src/Specific/solinas32_2e192m2e64m1/fesquare.c144
-rw-r--r--src/Specific/solinas32_2e192m2e64m1/freeze.c67
-rw-r--r--src/Specific/solinas32_2e194m33/femul.c130
-rw-r--r--src/Specific/solinas32_2e194m33/fesquare.c122
-rw-r--r--src/Specific/solinas32_2e194m33/freeze.c67
-rw-r--r--src/Specific/solinas32_2e196m15/femul.c118
-rw-r--r--src/Specific/solinas32_2e196m15/fesquare.c111
-rw-r--r--src/Specific/solinas32_2e196m15/freeze.c62
-rw-r--r--src/Specific/solinas32_2e198m17/femul.c142
-rw-r--r--src/Specific/solinas32_2e198m17/fesquare.c133
-rw-r--r--src/Specific/solinas32_2e198m17/freeze.c72
-rw-r--r--src/Specific/solinas32_2e205m45x2e198m1/freeze.c77
-rw-r--r--src/Specific/solinas32_2e206m5/femul.c178
-rw-r--r--src/Specific/solinas32_2e206m5/fesquare.c166
-rw-r--r--src/Specific/solinas32_2e206m5/freeze.c87
-rw-r--r--src/Specific/solinas32_2e212m29/femul.c130
-rw-r--r--src/Specific/solinas32_2e212m29/fesquare.c122
-rw-r--r--src/Specific/solinas32_2e212m29/freeze.c67
-rw-r--r--src/Specific/solinas32_2e213m3/femul.c214
-rw-r--r--src/Specific/solinas32_2e213m3/fesquare.c199
-rw-r--r--src/Specific/solinas32_2e213m3/freeze.c102
-rw-r--r--src/Specific/solinas32_2e216m2e108m1/femul.c164
-rw-r--r--src/Specific/solinas32_2e216m2e108m1/fesquare.c156
-rw-r--r--src/Specific/solinas32_2e216m2e108m1/freeze.c67
-rw-r--r--src/Specific/solinas32_2e221m3/femul.c154
-rw-r--r--src/Specific/solinas32_2e221m3/fesquare.c144
-rw-r--r--src/Specific/solinas32_2e221m3/freeze.c77
-rw-r--r--src/Specific/solinas32_2e222m117/femul.c154
-rw-r--r--src/Specific/solinas32_2e222m117/fesquare.c144
-rw-r--r--src/Specific/solinas32_2e222m117/freeze.c77
-rw-r--r--src/Specific/solinas32_2e224m2e96p1/freeze.c65
-rw-r--r--src/Specific/solinas32_2e226m5/femul.c130
-rw-r--r--src/Specific/solinas32_2e226m5/fesquare.c122
-rw-r--r--src/Specific/solinas32_2e226m5/freeze.c67
-rw-r--r--src/Specific/solinas32_2e230m27/femul.c154
-rw-r--r--src/Specific/solinas32_2e230m27/fesquare.c144
-rw-r--r--src/Specific/solinas32_2e230m27/freeze.c77
-rw-r--r--src/Specific/solinas32_2e235m15/femul.c142
-rw-r--r--src/Specific/solinas32_2e235m15/fesquare.c133
-rw-r--r--src/Specific/solinas32_2e235m15/freeze.c72
-rw-r--r--src/Specific/solinas32_2e243m9/femul.c142
-rw-r--r--src/Specific/solinas32_2e243m9/fesquare.c133
-rw-r--r--src/Specific/solinas32_2e243m9/freeze.c72
-rw-r--r--src/Specific/solinas32_2e251m9/femul.c154
-rw-r--r--src/Specific/solinas32_2e251m9/fesquare.c144
-rw-r--r--src/Specific/solinas32_2e251m9/freeze.c77
-rw-r--r--src/Specific/solinas32_2e254m127x2e240m1/freeze.c82
-rw-r--r--src/Specific/solinas32_2e255m19/femul.c154
-rw-r--r--src/Specific/solinas32_2e255m19/fesquare.c144
-rw-r--r--src/Specific/solinas32_2e255m19/freeze.c77
-rw-r--r--src/Specific/solinas32_2e255m2e4m2e1m1/femul.c168
-rw-r--r--src/Specific/solinas32_2e255m2e4m2e1m1/fesquare.c159
-rw-r--r--src/Specific/solinas32_2e255m2e4m2e1m1/freeze.c72
-rw-r--r--src/Specific/solinas32_2e255m765/femul.c178
-rw-r--r--src/Specific/solinas32_2e255m765/fesquare.c166
-rw-r--r--src/Specific/solinas32_2e255m765/freeze.c87
-rw-r--r--src/Specific/solinas32_2e256m189/femul.c178
-rw-r--r--src/Specific/solinas32_2e256m189/fesquare.c166
-rw-r--r--src/Specific/solinas32_2e256m189/freeze.c87
-rw-r--r--src/Specific/solinas32_2e256m2e224p2e192p2e96m1/freeze.c83
-rw-r--r--src/Specific/solinas32_2e256m2e32m977/femul.c200
-rw-r--r--src/Specific/solinas32_2e256m2e32m977/fesquare.c188
-rw-r--r--src/Specific/solinas32_2e256m2e32m977/freeze.c87
-rw-r--r--src/Specific/solinas32_2e256m88x2e240m1/freeze.c87
-rw-r--r--src/Specific/solinas32_2e266m3/femul.c178
-rw-r--r--src/Specific/solinas32_2e266m3/fesquare.c166
-rw-r--r--src/Specific/solinas32_2e266m3/freeze.c87
-rw-r--r--src/Specific/solinas32_2e285m9/femul.c214
-rw-r--r--src/Specific/solinas32_2e285m9/fesquare.c199
-rw-r--r--src/Specific/solinas32_2e285m9/freeze.c102
-rw-r--r--src/Specific/solinas32_2e291m19/femul.c178
-rw-r--r--src/Specific/solinas32_2e291m19/fesquare.c166
-rw-r--r--src/Specific/solinas32_2e291m19/freeze.c87
-rw-r--r--src/Specific/solinas32_2e321m9/femul.c226
-rw-r--r--src/Specific/solinas32_2e321m9/fesquare.c210
-rw-r--r--src/Specific/solinas32_2e321m9/freeze.c107
-rw-r--r--src/Specific/solinas32_2e322m2e161m1/femul.c248
-rw-r--r--src/Specific/solinas32_2e322m2e161m1/fesquare.c234
-rw-r--r--src/Specific/solinas32_2e322m2e161m1/freeze.c97
-rw-r--r--src/Specific/solinas32_2e336m17/femul.c202
-rw-r--r--src/Specific/solinas32_2e336m17/fesquare.c188
-rw-r--r--src/Specific/solinas32_2e336m17/freeze.c97
-rw-r--r--src/Specific/solinas32_2e336m3/femul.c178
-rw-r--r--src/Specific/solinas32_2e336m3/fesquare.c166
-rw-r--r--src/Specific/solinas32_2e336m3/freeze.c87
-rw-r--r--src/Specific/solinas32_2e338m15/femul.c190
-rw-r--r--src/Specific/solinas32_2e338m15/fesquare.c177
-rw-r--r--src/Specific/solinas32_2e338m15/freeze.c92
-rw-r--r--src/Specific/solinas32_2e369m25/femul.c226
-rw-r--r--src/Specific/solinas32_2e369m25/fesquare.c210
-rw-r--r--src/Specific/solinas32_2e369m25/freeze.c107
-rw-r--r--src/Specific/solinas32_2e384m2e128m2e96p2e32m1/freeze.c105
-rw-r--r--src/Specific/solinas32_2e384m5x2e368m1/freeze.c107
-rw-r--r--src/Specific/solinas32_2e384m79x2e376m1/freeze.c107
-rw-r--r--src/Specific/solinas32_2e401m31/femul.c226
-rw-r--r--src/Specific/solinas32_2e401m31/fesquare.c210
-rw-r--r--src/Specific/solinas32_2e401m31/freeze.c107
-rw-r--r--src/Specific/solinas32_2e416m2e208m1/femul.c276
-rw-r--r--src/Specific/solinas32_2e416m2e208m1/fesquare.c260
-rw-r--r--src/Specific/solinas32_2e416m2e208m1/freeze.c107
-rw-r--r--src/Specific/solinas32_2e448m2e224m1/femul.c276
-rw-r--r--src/Specific/solinas32_2e448m2e224m1/fesquare.c260
-rw-r--r--src/Specific/solinas32_2e448m2e224m1/freeze.c107
-rw-r--r--src/Specific/solinas32_2e450m2e225m1/freeze.c102
-rw-r--r--src/Specific/solinas32_2e452m3/femul.c226
-rw-r--r--src/Specific/solinas32_2e452m3/fesquare.c210
-rw-r--r--src/Specific/solinas32_2e452m3/freeze.c107
-rw-r--r--src/Specific/solinas32_2e480m2e240m1/femul.c276
-rw-r--r--src/Specific/solinas32_2e480m2e240m1/fesquare.c260
-rw-r--r--src/Specific/solinas32_2e480m2e240m1/freeze.c107
-rw-r--r--src/Specific/solinas32_2e511m187/freeze.c144
-rw-r--r--src/Specific/solinas32_2e511m187/freezeDisplay.log58
-rw-r--r--src/Specific/solinas64_2e127m1/femul.c70
-rw-r--r--src/Specific/solinas64_2e127m1/fesquare.c67
-rw-r--r--src/Specific/solinas64_2e127m1/freeze.c42
-rw-r--r--src/Specific/solinas64_2e129m25/femul.c70
-rw-r--r--src/Specific/solinas64_2e129m25/fesquare.c67
-rw-r--r--src/Specific/solinas64_2e129m25/freeze.c42
-rw-r--r--src/Specific/solinas64_2e130m5/femul.c70
-rw-r--r--src/Specific/solinas64_2e130m5/fesquare.c67
-rw-r--r--src/Specific/solinas64_2e130m5/freeze.c42
-rw-r--r--src/Specific/solinas64_2e137m13/femul.c82
-rw-r--r--src/Specific/solinas64_2e137m13/fesquare.c78
-rw-r--r--src/Specific/solinas64_2e137m13/freeze.c47
-rw-r--r--src/Specific/solinas64_2e140m27/femul.c82
-rw-r--r--src/Specific/solinas64_2e140m27/fesquare.c78
-rw-r--r--src/Specific/solinas64_2e140m27/freeze.c47
-rw-r--r--src/Specific/solinas64_2e141m9/femul.c70
-rw-r--r--src/Specific/solinas64_2e141m9/fesquare.c67
-rw-r--r--src/Specific/solinas64_2e141m9/freeze.c42
-rw-r--r--src/Specific/solinas64_2e150m3/femul.c70
-rw-r--r--src/Specific/solinas64_2e150m3/fesquare.c67
-rw-r--r--src/Specific/solinas64_2e150m3/freeze.c42
-rw-r--r--src/Specific/solinas64_2e150m5/femul.c70
-rw-r--r--src/Specific/solinas64_2e150m5/fesquare.c67
-rw-r--r--src/Specific/solinas64_2e150m5/freeze.c42
-rw-r--r--src/Specific/solinas64_2e152m17/femul.c82
-rw-r--r--src/Specific/solinas64_2e152m17/fesquare.c78
-rw-r--r--src/Specific/solinas64_2e152m17/freeze.c47
-rw-r--r--src/Specific/solinas64_2e158m15/femul.c82
-rw-r--r--src/Specific/solinas64_2e158m15/fesquare.c78
-rw-r--r--src/Specific/solinas64_2e158m15/freeze.c47
-rw-r--r--src/Specific/solinas64_2e165m25/femul.c70
-rw-r--r--src/Specific/solinas64_2e165m25/fesquare.c67
-rw-r--r--src/Specific/solinas64_2e165m25/freeze.c42
-rw-r--r--src/Specific/solinas64_2e166m5/femul.c70
-rw-r--r--src/Specific/solinas64_2e166m5/fesquare.c67
-rw-r--r--src/Specific/solinas64_2e166m5/freeze.c42
-rw-r--r--src/Specific/solinas64_2e171m19/femul.c70
-rw-r--r--src/Specific/solinas64_2e171m19/fesquare.c67
-rw-r--r--src/Specific/solinas64_2e171m19/freeze.c42
-rw-r--r--src/Specific/solinas64_2e174m17/femul.c70
-rw-r--r--src/Specific/solinas64_2e174m17/fesquare.c67
-rw-r--r--src/Specific/solinas64_2e174m17/freeze.c42
-rw-r--r--src/Specific/solinas64_2e174m3/femul.c70
-rw-r--r--src/Specific/solinas64_2e174m3/fesquare.c67
-rw-r--r--src/Specific/solinas64_2e174m3/freeze.c42
-rw-r--r--src/Specific/solinas64_2e189m25/femul.c82
-rw-r--r--src/Specific/solinas64_2e189m25/fesquare.c78
-rw-r--r--src/Specific/solinas64_2e189m25/freeze.c47
-rw-r--r--src/Specific/solinas64_2e190m11/femul.c82
-rw-r--r--src/Specific/solinas64_2e190m11/fesquare.c78
-rw-r--r--src/Specific/solinas64_2e190m11/freeze.c47
-rw-r--r--src/Specific/solinas64_2e191m19/femul.c94
-rw-r--r--src/Specific/solinas64_2e191m19/fesquare.c89
-rw-r--r--src/Specific/solinas64_2e191m19/freeze.c52
-rw-r--r--src/Specific/solinas64_2e192m2e64m1/femul.c104
-rw-r--r--src/Specific/solinas64_2e192m2e64m1/fesquare.c100
-rw-r--r--src/Specific/solinas64_2e192m2e64m1/freeze.c47
-rw-r--r--src/Specific/solinas64_2e194m33/femul.c82
-rw-r--r--src/Specific/solinas64_2e194m33/fesquare.c78
-rw-r--r--src/Specific/solinas64_2e194m33/freeze.c47
-rw-r--r--src/Specific/solinas64_2e196m15/femul.c82
-rw-r--r--src/Specific/solinas64_2e196m15/fesquare.c78
-rw-r--r--src/Specific/solinas64_2e196m15/freeze.c47
-rw-r--r--src/Specific/solinas64_2e198m17/femul.c82
-rw-r--r--src/Specific/solinas64_2e198m17/fesquare.c78
-rw-r--r--src/Specific/solinas64_2e198m17/freeze.c47
-rw-r--r--src/Specific/solinas64_2e205m45x2e198m1/freeze.c47
-rw-r--r--src/Specific/solinas64_2e206m5/femul.c82
-rw-r--r--src/Specific/solinas64_2e206m5/fesquare.c78
-rw-r--r--src/Specific/solinas64_2e206m5/freeze.c47
-rw-r--r--src/Specific/solinas64_2e212m29/femul.c82
-rw-r--r--src/Specific/solinas64_2e212m29/fesquare.c78
-rw-r--r--src/Specific/solinas64_2e212m29/freeze.c47
-rw-r--r--src/Specific/solinas64_2e213m3/femul.c82
-rw-r--r--src/Specific/solinas64_2e213m3/fesquare.c78
-rw-r--r--src/Specific/solinas64_2e213m3/freeze.c47
-rw-r--r--src/Specific/solinas64_2e216m2e108m1/femul.c108
-rw-r--r--src/Specific/solinas64_2e216m2e108m1/fesquare.c104
-rw-r--r--src/Specific/solinas64_2e216m2e108m1/freeze.c47
-rw-r--r--src/Specific/solinas64_2e221m3/femul.c82
-rw-r--r--src/Specific/solinas64_2e221m3/fesquare.c78
-rw-r--r--src/Specific/solinas64_2e221m3/freeze.c47
-rw-r--r--src/Specific/solinas64_2e222m117/femul.c82
-rw-r--r--src/Specific/solinas64_2e222m117/fesquare.c78
-rw-r--r--src/Specific/solinas64_2e222m117/freeze.c47
-rw-r--r--src/Specific/solinas64_2e224m2e96p1/freeze.c47
-rw-r--r--src/Specific/solinas64_2e226m5/femul.c82
-rw-r--r--src/Specific/solinas64_2e226m5/fesquare.c78
-rw-r--r--src/Specific/solinas64_2e226m5/freeze.c47
-rw-r--r--src/Specific/solinas64_2e230m27/femul.c82
-rw-r--r--src/Specific/solinas64_2e230m27/fesquare.c78
-rw-r--r--src/Specific/solinas64_2e230m27/freeze.c47
-rw-r--r--src/Specific/solinas64_2e235m15/femul.c94
-rw-r--r--src/Specific/solinas64_2e235m15/fesquare.c89
-rw-r--r--src/Specific/solinas64_2e235m15/freeze.c52
-rw-r--r--src/Specific/solinas64_2e243m9/femul.c106
-rw-r--r--src/Specific/solinas64_2e243m9/fesquare.c100
-rw-r--r--src/Specific/solinas64_2e243m9/freeze.c57
-rw-r--r--src/Specific/solinas64_2e251m9/femul.c94
-rw-r--r--src/Specific/solinas64_2e251m9/fesquare.c89
-rw-r--r--src/Specific/solinas64_2e251m9/freeze.c53
-rw-r--r--src/Specific/solinas64_2e254m127x2e240m1/freeze.c57
-rw-r--r--src/Specific/solinas64_2e255m19/femul.c94
-rw-r--r--src/Specific/solinas64_2e255m19/fesquare.c89
-rw-r--r--src/Specific/solinas64_2e255m19/freeze.c71
-rw-r--r--src/Specific/solinas64_2e255m2e4m2e1m1/femul.c120
-rw-r--r--src/Specific/solinas64_2e255m2e4m2e1m1/fesquare.c115
-rw-r--r--src/Specific/solinas64_2e255m2e4m2e1m1/freeze.c71
-rw-r--r--src/Specific/solinas64_2e255m765/femul.c94
-rw-r--r--src/Specific/solinas64_2e255m765/fesquare.c89
-rw-r--r--src/Specific/solinas64_2e255m765/freeze.c71
-rw-r--r--src/Specific/solinas64_2e256m189/femul.c94
-rw-r--r--src/Specific/solinas64_2e256m189/fesquare.c89
-rw-r--r--src/Specific/solinas64_2e256m189/freeze.c52
-rw-r--r--src/Specific/solinas64_2e256m2e224p2e192p2e96m1/freeze.c51
-rw-r--r--src/Specific/solinas64_2e256m2e32m977/femul.c110
-rw-r--r--src/Specific/solinas64_2e256m2e32m977/fesquare.c105
-rw-r--r--src/Specific/solinas64_2e256m2e32m977/freeze.c52
-rw-r--r--src/Specific/solinas64_2e256m88x2e240m1/freeze.c52
-rw-r--r--src/Specific/solinas64_2e266m3/femul.c94
-rw-r--r--src/Specific/solinas64_2e266m3/fesquare.c89
-rw-r--r--src/Specific/solinas64_2e266m3/freeze.c52
-rw-r--r--src/Specific/solinas64_2e285m9/femul.c94
-rw-r--r--src/Specific/solinas64_2e285m9/fesquare.c89
-rw-r--r--src/Specific/solinas64_2e285m9/freeze.c52
-rw-r--r--src/Specific/solinas64_2e291m19/femul.c94
-rw-r--r--src/Specific/solinas64_2e291m19/fesquare.c89
-rw-r--r--src/Specific/solinas64_2e291m19/freeze.c52
-rw-r--r--src/Specific/solinas64_2e321m9/femul.c106
-rw-r--r--src/Specific/solinas64_2e321m9/fesquare.c100
-rw-r--r--src/Specific/solinas64_2e321m9/freeze.c57
-rw-r--r--src/Specific/solinas64_2e322m2e161m1/freeze.c62
-rw-r--r--src/Specific/solinas64_2e336m17/femul.c106
-rw-r--r--src/Specific/solinas64_2e336m17/fesquare.c100
-rw-r--r--src/Specific/solinas64_2e336m17/freeze.c57
-rw-r--r--src/Specific/solinas64_2e336m3/femul.c106
-rw-r--r--src/Specific/solinas64_2e336m3/fesquare.c100
-rw-r--r--src/Specific/solinas64_2e336m3/freeze.c57
-rw-r--r--src/Specific/solinas64_2e338m15/femul.c106
-rw-r--r--src/Specific/solinas64_2e338m15/fesquare.c100
-rw-r--r--src/Specific/solinas64_2e338m15/freeze.c57
-rw-r--r--src/Specific/solinas64_2e369m25/femul.c130
-rw-r--r--src/Specific/solinas64_2e369m25/fesquare.c122
-rw-r--r--src/Specific/solinas64_2e369m25/freeze.c67
-rw-r--r--src/Specific/solinas64_2e379m19/femul.c118
-rw-r--r--src/Specific/solinas64_2e379m19/fesquare.c111
-rw-r--r--src/Specific/solinas64_2e379m19/freeze.c62
-rw-r--r--src/Specific/solinas64_2e382m105/femul.c154
-rw-r--r--src/Specific/solinas64_2e382m105/fesquare.c144
-rw-r--r--src/Specific/solinas64_2e382m105/freeze.c77
-rw-r--r--src/Specific/solinas64_2e384m2e128m2e96p2e32m1/freeze.c66
-rw-r--r--src/Specific/solinas64_2e384m317/femul.c130
-rw-r--r--src/Specific/solinas64_2e384m317/fesquare.c122
-rw-r--r--src/Specific/solinas64_2e384m317/freeze.c67
-rw-r--r--src/Specific/solinas64_2e384m5x2e368m1/freeze.c67
-rw-r--r--src/Specific/solinas64_2e384m79x2e376m1/freeze.c67
-rw-r--r--src/Specific/solinas64_2e401m31/femul.c130
-rw-r--r--src/Specific/solinas64_2e401m31/fesquare.c122
-rw-r--r--src/Specific/solinas64_2e401m31/freeze.c68
-rw-r--r--src/Specific/solinas64_2e413m21/femul.c118
-rw-r--r--src/Specific/solinas64_2e413m21/fesquare.c111
-rw-r--r--src/Specific/solinas64_2e413m21/freeze.c62
-rw-r--r--src/Specific/solinas64_2e414m17/femul.c142
-rw-r--r--src/Specific/solinas64_2e414m17/fesquare.c133
-rw-r--r--src/Specific/solinas64_2e414m17/freeze.c72
-rw-r--r--src/Specific/solinas64_2e416m2e208m1/femul.c164
-rw-r--r--src/Specific/solinas64_2e416m2e208m1/fesquare.c156
-rw-r--r--src/Specific/solinas64_2e416m2e208m1/freeze.c67
-rw-r--r--src/Specific/solinas64_2e444m17/femul.c130
-rw-r--r--src/Specific/solinas64_2e444m17/fesquare.c122
-rw-r--r--src/Specific/solinas64_2e444m17/freeze.c67
-rw-r--r--src/Specific/solinas64_2e448m2e224m1/femul.c164
-rw-r--r--src/Specific/solinas64_2e448m2e224m1/fesquare.c156
-rw-r--r--src/Specific/solinas64_2e448m2e224m1/freeze.c67
-rw-r--r--src/Specific/solinas64_2e450m2e225m1/femul.c164
-rw-r--r--src/Specific/solinas64_2e450m2e225m1/fesquare.c156
-rw-r--r--src/Specific/solinas64_2e450m2e225m1/freeze.c67
-rw-r--r--src/Specific/solinas64_2e452m3/femul.c130
-rw-r--r--src/Specific/solinas64_2e452m3/fesquare.c122
-rw-r--r--src/Specific/solinas64_2e452m3/freeze.c67
-rw-r--r--src/Specific/solinas64_2e468m17/femul.c130
-rw-r--r--src/Specific/solinas64_2e468m17/fesquare.c122
-rw-r--r--src/Specific/solinas64_2e468m17/freeze.c67
-rw-r--r--src/Specific/solinas64_2e480m2e240m1/femul.c164
-rw-r--r--src/Specific/solinas64_2e480m2e240m1/fesquare.c156
-rw-r--r--src/Specific/solinas64_2e480m2e240m1/freeze.c67
-rw-r--r--src/Specific/solinas64_2e488m17/femul.c226
-rw-r--r--src/Specific/solinas64_2e488m17/fesquare.c210
-rw-r--r--src/Specific/solinas64_2e488m17/freeze.c107
-rw-r--r--src/Specific/solinas64_2e489m21/femul.c142
-rw-r--r--src/Specific/solinas64_2e489m21/fesquare.c133
-rw-r--r--src/Specific/solinas64_2e489m21/freeze.c72
-rw-r--r--src/Specific/solinas64_2e495m31/femul.c142
-rw-r--r--src/Specific/solinas64_2e495m31/fesquare.c133
-rw-r--r--src/Specific/solinas64_2e495m31/freeze.c72
-rw-r--r--src/Specific/solinas64_2e510m290x2e496m1/freeze.c116
-rw-r--r--src/Specific/solinas64_2e511m187/femul.c154
-rw-r--r--src/Specific/solinas64_2e511m187/fesquare.c144
-rw-r--r--src/Specific/solinas64_2e511m187/freeze.c77
-rw-r--r--src/Specific/solinas64_2e511m481/femul.c154
-rw-r--r--src/Specific/solinas64_2e511m481/fesquare.c144
-rw-r--r--src/Specific/solinas64_2e511m481/freeze.c77
-rw-r--r--src/Specific/solinas64_2e512m491x2e496m1/freeze.c77
-rw-r--r--src/Specific/solinas64_2e512m569/femul.c154
-rw-r--r--src/Specific/solinas64_2e512m569/fesquare.c144
-rw-r--r--src/Specific/solinas64_2e512m569/freeze.c77
-rw-r--r--src/Specific/solinas64_2e521m1/femul.c154
-rw-r--r--src/Specific/solinas64_2e521m1/fesquare.c144
-rw-r--r--src/Specific/solinas64_2e521m1/freeze.c77
942 files changed, 49141 insertions, 51387 deletions
diff --git a/src/Specific/montgomery32_2e127m1/feadd.c b/src/Specific/montgomery32_2e127m1/feadd.c
index aaa652c86..ab8a6011e 100644
--- a/src/Specific/montgomery32_2e127m1/feadd.c
+++ b/src/Specific/montgomery32_2e127m1/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint32_t x17; uint8_t x18 = _addcarryx_u32(0x0, x5, x11, &x17);
-{ uint32_t x20; uint8_t x21 = _addcarryx_u32(x18, x7, x13, &x20);
-{ uint32_t x23; uint8_t x24 = _addcarryx_u32(x21, x9, x15, &x23);
-{ uint32_t x26; uint8_t x27 = _addcarryx_u32(x24, x8, x14, &x26);
-{ uint32_t x29; uint8_t x30 = _subborrow_u32(0x0, x17, 0xffffffff, &x29);
-{ uint32_t x32; uint8_t x33 = _subborrow_u32(x30, x20, 0xffffffff, &x32);
-{ uint32_t x35; uint8_t x36 = _subborrow_u32(x33, x23, 0xffffffff, &x35);
-{ uint32_t x38; uint8_t x39 = _subborrow_u32(x36, x26, 0x7fffffff, &x38);
-{ uint32_t _; uint8_t x42 = _subborrow_u32(x39, x27, 0x0, &_);
-{ uint32_t x43 = cmovznz(x42, x38, x26);
-{ uint32_t x44 = cmovznz(x42, x35, x23);
-{ uint32_t x45 = cmovznz(x42, x32, x20);
-{ uint32_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint32_t out[4], const uint32_t in1[4], const uint32_t in2[4]) {
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x14 = in2[3];
+ { const uint32_t x15 = in2[2];
+ { const uint32_t x13 = in2[1];
+ { const uint32_t x11 = in2[0];
+ { uint32_t x17; uint8_t x18 = _addcarryx_u32(0x0, x5, x11, &x17);
+ { uint32_t x20; uint8_t x21 = _addcarryx_u32(x18, x7, x13, &x20);
+ { uint32_t x23; uint8_t x24 = _addcarryx_u32(x21, x9, x15, &x23);
+ { uint32_t x26; uint8_t x27 = _addcarryx_u32(x24, x8, x14, &x26);
+ { uint32_t x29; uint8_t x30 = _subborrow_u32(0x0, x17, 0xffffffff, &x29);
+ { uint32_t x32; uint8_t x33 = _subborrow_u32(x30, x20, 0xffffffff, &x32);
+ { uint32_t x35; uint8_t x36 = _subborrow_u32(x33, x23, 0xffffffff, &x35);
+ { uint32_t x38; uint8_t x39 = _subborrow_u32(x36, x26, 0x7fffffff, &x38);
+ { uint32_t _; uint8_t x42 = _subborrow_u32(x39, x27, 0x0, &_);
+ { uint32_t x43 = cmovznz(x42, x38, x26);
+ { uint32_t x44 = cmovznz(x42, x35, x23);
+ { uint32_t x45 = cmovznz(x42, x32, x20);
+ { uint32_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e127m1/femul.c b/src/Specific/montgomery32_2e127m1/femul.c
index 603b981dd..4eb70f7e4 100644
--- a/src/Specific/montgomery32_2e127m1/femul.c
+++ b/src/Specific/montgomery32_2e127m1/femul.c
@@ -1,136 +1,126 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint32_t x18; uint32_t x17 = _mulx_u32(x5, x11, &x18);
-{ uint32_t x21; uint32_t x20 = _mulx_u32(x5, x13, &x21);
-{ uint32_t x24; uint32_t x23 = _mulx_u32(x5, x15, &x24);
-{ uint32_t x27; uint32_t x26 = _mulx_u32(x5, x14, &x27);
-{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x18, x20, &x29);
-{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x21, x23, &x32);
-{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x24, x26, &x35);
-{ uint32_t x38; uint8_t _ = _addcarryx_u32(0x0, x36, x27, &x38);
-{ uint32_t x42; uint32_t x41 = _mulx_u32(x17, 0xffffffff, &x42);
-{ uint32_t x45; uint32_t x44 = _mulx_u32(x17, 0xffffffff, &x45);
-{ uint32_t x48; uint32_t x47 = _mulx_u32(x17, 0xffffffff, &x48);
-{ uint32_t x51; uint32_t x50 = _mulx_u32(x17, 0x7fffffff, &x51);
-{ uint32_t x53; uint8_t x54 = _addcarryx_u32(0x0, x42, x44, &x53);
-{ uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x45, x47, &x56);
-{ uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x48, x50, &x59);
-{ uint32_t x62; uint8_t _ = _addcarryx_u32(0x0, x60, x51, &x62);
-{ uint32_t _; uint8_t x66 = _addcarryx_u32(0x0, x17, x41, &_);
-{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x29, x53, &x68);
-{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x32, x56, &x71);
-{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x35, x59, &x74);
-{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x38, x62, &x77);
-{ uint32_t x81; uint32_t x80 = _mulx_u32(x7, x11, &x81);
-{ uint32_t x84; uint32_t x83 = _mulx_u32(x7, x13, &x84);
-{ uint32_t x87; uint32_t x86 = _mulx_u32(x7, x15, &x87);
-{ uint32_t x90; uint32_t x89 = _mulx_u32(x7, x14, &x90);
-{ uint32_t x92; uint8_t x93 = _addcarryx_u32(0x0, x81, x83, &x92);
-{ uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x84, x86, &x95);
-{ uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x87, x89, &x98);
-{ uint32_t x101; uint8_t _ = _addcarryx_u32(0x0, x99, x90, &x101);
-{ uint32_t x104; uint8_t x105 = _addcarryx_u32(0x0, x68, x80, &x104);
-{ uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x71, x92, &x107);
-{ uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x74, x95, &x110);
-{ uint32_t x113; uint8_t x114 = _addcarryx_u32(x111, x77, x98, &x113);
-{ uint32_t x116; uint8_t x117 = _addcarryx_u32(x114, x78, x101, &x116);
-{ uint32_t x120; uint32_t x119 = _mulx_u32(x104, 0xffffffff, &x120);
-{ uint32_t x123; uint32_t x122 = _mulx_u32(x104, 0xffffffff, &x123);
-{ uint32_t x126; uint32_t x125 = _mulx_u32(x104, 0xffffffff, &x126);
-{ uint32_t x129; uint32_t x128 = _mulx_u32(x104, 0x7fffffff, &x129);
-{ uint32_t x131; uint8_t x132 = _addcarryx_u32(0x0, x120, x122, &x131);
-{ uint32_t x134; uint8_t x135 = _addcarryx_u32(x132, x123, x125, &x134);
-{ uint32_t x137; uint8_t x138 = _addcarryx_u32(x135, x126, x128, &x137);
-{ uint32_t x140; uint8_t _ = _addcarryx_u32(0x0, x138, x129, &x140);
-{ uint32_t _; uint8_t x144 = _addcarryx_u32(0x0, x104, x119, &_);
-{ uint32_t x146; uint8_t x147 = _addcarryx_u32(x144, x107, x131, &x146);
-{ uint32_t x149; uint8_t x150 = _addcarryx_u32(x147, x110, x134, &x149);
-{ uint32_t x152; uint8_t x153 = _addcarryx_u32(x150, x113, x137, &x152);
-{ uint32_t x155; uint8_t x156 = _addcarryx_u32(x153, x116, x140, &x155);
-{ uint8_t x157 = (x156 + x117);
-{ uint32_t x160; uint32_t x159 = _mulx_u32(x9, x11, &x160);
-{ uint32_t x163; uint32_t x162 = _mulx_u32(x9, x13, &x163);
-{ uint32_t x166; uint32_t x165 = _mulx_u32(x9, x15, &x166);
-{ uint32_t x169; uint32_t x168 = _mulx_u32(x9, x14, &x169);
-{ uint32_t x171; uint8_t x172 = _addcarryx_u32(0x0, x160, x162, &x171);
-{ uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x163, x165, &x174);
-{ uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x166, x168, &x177);
-{ uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x169, &x180);
-{ uint32_t x183; uint8_t x184 = _addcarryx_u32(0x0, x146, x159, &x183);
-{ uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x149, x171, &x186);
-{ uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x152, x174, &x189);
-{ uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x155, x177, &x192);
-{ uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x157, x180, &x195);
-{ uint32_t x199; uint32_t x198 = _mulx_u32(x183, 0xffffffff, &x199);
-{ uint32_t x202; uint32_t x201 = _mulx_u32(x183, 0xffffffff, &x202);
-{ uint32_t x205; uint32_t x204 = _mulx_u32(x183, 0xffffffff, &x205);
-{ uint32_t x208; uint32_t x207 = _mulx_u32(x183, 0x7fffffff, &x208);
-{ uint32_t x210; uint8_t x211 = _addcarryx_u32(0x0, x199, x201, &x210);
-{ uint32_t x213; uint8_t x214 = _addcarryx_u32(x211, x202, x204, &x213);
-{ uint32_t x216; uint8_t x217 = _addcarryx_u32(x214, x205, x207, &x216);
-{ uint32_t x219; uint8_t _ = _addcarryx_u32(0x0, x217, x208, &x219);
-{ uint32_t _; uint8_t x223 = _addcarryx_u32(0x0, x183, x198, &_);
-{ uint32_t x225; uint8_t x226 = _addcarryx_u32(x223, x186, x210, &x225);
-{ uint32_t x228; uint8_t x229 = _addcarryx_u32(x226, x189, x213, &x228);
-{ uint32_t x231; uint8_t x232 = _addcarryx_u32(x229, x192, x216, &x231);
-{ uint32_t x234; uint8_t x235 = _addcarryx_u32(x232, x195, x219, &x234);
-{ uint8_t x236 = (x235 + x196);
-{ uint32_t x239; uint32_t x238 = _mulx_u32(x8, x11, &x239);
-{ uint32_t x242; uint32_t x241 = _mulx_u32(x8, x13, &x242);
-{ uint32_t x245; uint32_t x244 = _mulx_u32(x8, x15, &x245);
-{ uint32_t x248; uint32_t x247 = _mulx_u32(x8, x14, &x248);
-{ uint32_t x250; uint8_t x251 = _addcarryx_u32(0x0, x239, x241, &x250);
-{ uint32_t x253; uint8_t x254 = _addcarryx_u32(x251, x242, x244, &x253);
-{ uint32_t x256; uint8_t x257 = _addcarryx_u32(x254, x245, x247, &x256);
-{ uint32_t x259; uint8_t _ = _addcarryx_u32(0x0, x257, x248, &x259);
-{ uint32_t x262; uint8_t x263 = _addcarryx_u32(0x0, x225, x238, &x262);
-{ uint32_t x265; uint8_t x266 = _addcarryx_u32(x263, x228, x250, &x265);
-{ uint32_t x268; uint8_t x269 = _addcarryx_u32(x266, x231, x253, &x268);
-{ uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x234, x256, &x271);
-{ uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x236, x259, &x274);
-{ uint32_t x278; uint32_t x277 = _mulx_u32(x262, 0xffffffff, &x278);
-{ uint32_t x281; uint32_t x280 = _mulx_u32(x262, 0xffffffff, &x281);
-{ uint32_t x284; uint32_t x283 = _mulx_u32(x262, 0xffffffff, &x284);
-{ uint32_t x287; uint32_t x286 = _mulx_u32(x262, 0x7fffffff, &x287);
-{ uint32_t x289; uint8_t x290 = _addcarryx_u32(0x0, x278, x280, &x289);
-{ uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x281, x283, &x292);
-{ uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x284, x286, &x295);
-{ uint32_t x298; uint8_t _ = _addcarryx_u32(0x0, x296, x287, &x298);
-{ uint32_t _; uint8_t x302 = _addcarryx_u32(0x0, x262, x277, &_);
-{ uint32_t x304; uint8_t x305 = _addcarryx_u32(x302, x265, x289, &x304);
-{ uint32_t x307; uint8_t x308 = _addcarryx_u32(x305, x268, x292, &x307);
-{ uint32_t x310; uint8_t x311 = _addcarryx_u32(x308, x271, x295, &x310);
-{ uint32_t x313; uint8_t x314 = _addcarryx_u32(x311, x274, x298, &x313);
-{ uint8_t x315 = (x314 + x275);
-{ uint32_t x317; uint8_t x318 = _subborrow_u32(0x0, x304, 0xffffffff, &x317);
-{ uint32_t x320; uint8_t x321 = _subborrow_u32(x318, x307, 0xffffffff, &x320);
-{ uint32_t x323; uint8_t x324 = _subborrow_u32(x321, x310, 0xffffffff, &x323);
-{ uint32_t x326; uint8_t x327 = _subborrow_u32(x324, x313, 0x7fffffff, &x326);
-{ uint32_t _; uint8_t x330 = _subborrow_u32(x327, x315, 0x0, &_);
-{ uint32_t x331 = cmovznz(x330, x326, x313);
-{ uint32_t x332 = cmovznz(x330, x323, x310);
-{ uint32_t x333 = cmovznz(x330, x320, x307);
-{ uint32_t x334 = cmovznz(x330, x317, x304);
-out[0] = x331;
-out[1] = x332;
-out[2] = x333;
-out[3] = x334;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint32_t out[4], const uint32_t in1[4], const uint32_t in2[4]) {
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x14 = in2[3];
+ { const uint32_t x15 = in2[2];
+ { const uint32_t x13 = in2[1];
+ { const uint32_t x11 = in2[0];
+ { uint32_t x18; uint32_t x17 = _mulx_u32(x5, x11, &x18);
+ { uint32_t x21; uint32_t x20 = _mulx_u32(x5, x13, &x21);
+ { uint32_t x24; uint32_t x23 = _mulx_u32(x5, x15, &x24);
+ { uint32_t x27; uint32_t x26 = _mulx_u32(x5, x14, &x27);
+ { uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x18, x20, &x29);
+ { uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x21, x23, &x32);
+ { uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x24, x26, &x35);
+ { uint32_t x38; uint8_t _ = _addcarryx_u32(0x0, x36, x27, &x38);
+ { uint32_t x42; uint32_t x41 = _mulx_u32(x17, 0xffffffff, &x42);
+ { uint32_t x45; uint32_t x44 = _mulx_u32(x17, 0xffffffff, &x45);
+ { uint32_t x48; uint32_t x47 = _mulx_u32(x17, 0xffffffff, &x48);
+ { uint32_t x51; uint32_t x50 = _mulx_u32(x17, 0x7fffffff, &x51);
+ { uint32_t x53; uint8_t x54 = _addcarryx_u32(0x0, x42, x44, &x53);
+ { uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x45, x47, &x56);
+ { uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x48, x50, &x59);
+ { uint32_t x62; uint8_t _ = _addcarryx_u32(0x0, x60, x51, &x62);
+ { uint32_t _; uint8_t x66 = _addcarryx_u32(0x0, x17, x41, &_);
+ { uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x29, x53, &x68);
+ { uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x32, x56, &x71);
+ { uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x35, x59, &x74);
+ { uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x38, x62, &x77);
+ { uint32_t x81; uint32_t x80 = _mulx_u32(x7, x11, &x81);
+ { uint32_t x84; uint32_t x83 = _mulx_u32(x7, x13, &x84);
+ { uint32_t x87; uint32_t x86 = _mulx_u32(x7, x15, &x87);
+ { uint32_t x90; uint32_t x89 = _mulx_u32(x7, x14, &x90);
+ { uint32_t x92; uint8_t x93 = _addcarryx_u32(0x0, x81, x83, &x92);
+ { uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x84, x86, &x95);
+ { uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x87, x89, &x98);
+ { uint32_t x101; uint8_t _ = _addcarryx_u32(0x0, x99, x90, &x101);
+ { uint32_t x104; uint8_t x105 = _addcarryx_u32(0x0, x68, x80, &x104);
+ { uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x71, x92, &x107);
+ { uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x74, x95, &x110);
+ { uint32_t x113; uint8_t x114 = _addcarryx_u32(x111, x77, x98, &x113);
+ { uint32_t x116; uint8_t x117 = _addcarryx_u32(x114, x78, x101, &x116);
+ { uint32_t x120; uint32_t x119 = _mulx_u32(x104, 0xffffffff, &x120);
+ { uint32_t x123; uint32_t x122 = _mulx_u32(x104, 0xffffffff, &x123);
+ { uint32_t x126; uint32_t x125 = _mulx_u32(x104, 0xffffffff, &x126);
+ { uint32_t x129; uint32_t x128 = _mulx_u32(x104, 0x7fffffff, &x129);
+ { uint32_t x131; uint8_t x132 = _addcarryx_u32(0x0, x120, x122, &x131);
+ { uint32_t x134; uint8_t x135 = _addcarryx_u32(x132, x123, x125, &x134);
+ { uint32_t x137; uint8_t x138 = _addcarryx_u32(x135, x126, x128, &x137);
+ { uint32_t x140; uint8_t _ = _addcarryx_u32(0x0, x138, x129, &x140);
+ { uint32_t _; uint8_t x144 = _addcarryx_u32(0x0, x104, x119, &_);
+ { uint32_t x146; uint8_t x147 = _addcarryx_u32(x144, x107, x131, &x146);
+ { uint32_t x149; uint8_t x150 = _addcarryx_u32(x147, x110, x134, &x149);
+ { uint32_t x152; uint8_t x153 = _addcarryx_u32(x150, x113, x137, &x152);
+ { uint32_t x155; uint8_t x156 = _addcarryx_u32(x153, x116, x140, &x155);
+ { uint8_t x157 = (x156 + x117);
+ { uint32_t x160; uint32_t x159 = _mulx_u32(x9, x11, &x160);
+ { uint32_t x163; uint32_t x162 = _mulx_u32(x9, x13, &x163);
+ { uint32_t x166; uint32_t x165 = _mulx_u32(x9, x15, &x166);
+ { uint32_t x169; uint32_t x168 = _mulx_u32(x9, x14, &x169);
+ { uint32_t x171; uint8_t x172 = _addcarryx_u32(0x0, x160, x162, &x171);
+ { uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x163, x165, &x174);
+ { uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x166, x168, &x177);
+ { uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x169, &x180);
+ { uint32_t x183; uint8_t x184 = _addcarryx_u32(0x0, x146, x159, &x183);
+ { uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x149, x171, &x186);
+ { uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x152, x174, &x189);
+ { uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x155, x177, &x192);
+ { uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x157, x180, &x195);
+ { uint32_t x199; uint32_t x198 = _mulx_u32(x183, 0xffffffff, &x199);
+ { uint32_t x202; uint32_t x201 = _mulx_u32(x183, 0xffffffff, &x202);
+ { uint32_t x205; uint32_t x204 = _mulx_u32(x183, 0xffffffff, &x205);
+ { uint32_t x208; uint32_t x207 = _mulx_u32(x183, 0x7fffffff, &x208);
+ { uint32_t x210; uint8_t x211 = _addcarryx_u32(0x0, x199, x201, &x210);
+ { uint32_t x213; uint8_t x214 = _addcarryx_u32(x211, x202, x204, &x213);
+ { uint32_t x216; uint8_t x217 = _addcarryx_u32(x214, x205, x207, &x216);
+ { uint32_t x219; uint8_t _ = _addcarryx_u32(0x0, x217, x208, &x219);
+ { uint32_t _; uint8_t x223 = _addcarryx_u32(0x0, x183, x198, &_);
+ { uint32_t x225; uint8_t x226 = _addcarryx_u32(x223, x186, x210, &x225);
+ { uint32_t x228; uint8_t x229 = _addcarryx_u32(x226, x189, x213, &x228);
+ { uint32_t x231; uint8_t x232 = _addcarryx_u32(x229, x192, x216, &x231);
+ { uint32_t x234; uint8_t x235 = _addcarryx_u32(x232, x195, x219, &x234);
+ { uint8_t x236 = (x235 + x196);
+ { uint32_t x239; uint32_t x238 = _mulx_u32(x8, x11, &x239);
+ { uint32_t x242; uint32_t x241 = _mulx_u32(x8, x13, &x242);
+ { uint32_t x245; uint32_t x244 = _mulx_u32(x8, x15, &x245);
+ { uint32_t x248; uint32_t x247 = _mulx_u32(x8, x14, &x248);
+ { uint32_t x250; uint8_t x251 = _addcarryx_u32(0x0, x239, x241, &x250);
+ { uint32_t x253; uint8_t x254 = _addcarryx_u32(x251, x242, x244, &x253);
+ { uint32_t x256; uint8_t x257 = _addcarryx_u32(x254, x245, x247, &x256);
+ { uint32_t x259; uint8_t _ = _addcarryx_u32(0x0, x257, x248, &x259);
+ { uint32_t x262; uint8_t x263 = _addcarryx_u32(0x0, x225, x238, &x262);
+ { uint32_t x265; uint8_t x266 = _addcarryx_u32(x263, x228, x250, &x265);
+ { uint32_t x268; uint8_t x269 = _addcarryx_u32(x266, x231, x253, &x268);
+ { uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x234, x256, &x271);
+ { uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x236, x259, &x274);
+ { uint32_t x278; uint32_t x277 = _mulx_u32(x262, 0xffffffff, &x278);
+ { uint32_t x281; uint32_t x280 = _mulx_u32(x262, 0xffffffff, &x281);
+ { uint32_t x284; uint32_t x283 = _mulx_u32(x262, 0xffffffff, &x284);
+ { uint32_t x287; uint32_t x286 = _mulx_u32(x262, 0x7fffffff, &x287);
+ { uint32_t x289; uint8_t x290 = _addcarryx_u32(0x0, x278, x280, &x289);
+ { uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x281, x283, &x292);
+ { uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x284, x286, &x295);
+ { uint32_t x298; uint8_t _ = _addcarryx_u32(0x0, x296, x287, &x298);
+ { uint32_t _; uint8_t x302 = _addcarryx_u32(0x0, x262, x277, &_);
+ { uint32_t x304; uint8_t x305 = _addcarryx_u32(x302, x265, x289, &x304);
+ { uint32_t x307; uint8_t x308 = _addcarryx_u32(x305, x268, x292, &x307);
+ { uint32_t x310; uint8_t x311 = _addcarryx_u32(x308, x271, x295, &x310);
+ { uint32_t x313; uint8_t x314 = _addcarryx_u32(x311, x274, x298, &x313);
+ { uint8_t x315 = (x314 + x275);
+ { uint32_t x317; uint8_t x318 = _subborrow_u32(0x0, x304, 0xffffffff, &x317);
+ { uint32_t x320; uint8_t x321 = _subborrow_u32(x318, x307, 0xffffffff, &x320);
+ { uint32_t x323; uint8_t x324 = _subborrow_u32(x321, x310, 0xffffffff, &x323);
+ { uint32_t x326; uint8_t x327 = _subborrow_u32(x324, x313, 0x7fffffff, &x326);
+ { uint32_t _; uint8_t x330 = _subborrow_u32(x327, x315, 0x0, &_);
+ { uint32_t x331 = cmovznz(x330, x326, x313);
+ { uint32_t x332 = cmovznz(x330, x323, x310);
+ { uint32_t x333 = cmovznz(x330, x320, x307);
+ { uint32_t x334 = cmovznz(x330, x317, x304);
+ out[0] = x334;
+ out[1] = x333;
+ out[2] = x332;
+ out[3] = x331;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e127m1/fenz.c b/src/Specific/montgomery32_2e127m1/fenz.c
index 308da273a..65f47ec44 100644
--- a/src/Specific/montgomery32_2e127m1/fenz.c
+++ b/src/Specific/montgomery32_2e127m1/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x7 = (x6 | x5);
-{ uint32_t x8 = (x4 | x7);
-{ uint32_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[4]) {
+ { const uint32_t x5 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x7 = (x6 | x5);
+ { uint32_t x8 = (x4 | x7);
+ { uint32_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e127m1/feopp.c b/src/Specific/montgomery32_2e127m1/feopp.c
index db5acedf9..92d7b9903 100644
--- a/src/Specific/montgomery32_2e127m1/feopp.c
+++ b/src/Specific/montgomery32_2e127m1/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x8; uint8_t x9 = _subborrow_u32(0x0, 0x0, x2, &x8);
-{ uint32_t x11; uint8_t x12 = _subborrow_u32(x9, 0x0, x4, &x11);
-{ uint32_t x14; uint8_t x15 = _subborrow_u32(x12, 0x0, x6, &x14);
-{ uint32_t x17; uint8_t x18 = _subborrow_u32(x15, 0x0, x5, &x17);
-{ uint32_t x19 = (uint32_t)cmovznz(x18, 0x0, 0xffffffff);
-{ uint32_t x20 = (x19 & 0xffffffff);
-{ uint32_t x22; uint8_t x23 = _addcarryx_u32(0x0, x8, x20, &x22);
-{ uint32_t x24 = (x19 & 0xffffffff);
-{ uint32_t x26; uint8_t x27 = _addcarryx_u32(x23, x11, x24, &x26);
-{ uint32_t x28 = (x19 & 0xffffffff);
-{ uint32_t x30; uint8_t x31 = _addcarryx_u32(x27, x14, x28, &x30);
-{ uint32_t x32 = (x19 & 0x7fffffff);
-{ uint32_t x34; uint8_t _ = _addcarryx_u32(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint32_t out[4], const uint32_t in1[4]) {
+ { const uint32_t x5 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x8; uint8_t x9 = _subborrow_u32(0x0, 0x0, x2, &x8);
+ { uint32_t x11; uint8_t x12 = _subborrow_u32(x9, 0x0, x4, &x11);
+ { uint32_t x14; uint8_t x15 = _subborrow_u32(x12, 0x0, x6, &x14);
+ { uint32_t x17; uint8_t x18 = _subborrow_u32(x15, 0x0, x5, &x17);
+ { uint32_t x19 = (uint32_t)cmovznz(x18, 0x0, 0xffffffff);
+ { uint32_t x20 = (x19 & 0xffffffff);
+ { uint32_t x22; uint8_t x23 = _addcarryx_u32(0x0, x8, x20, &x22);
+ { uint32_t x24 = (x19 & 0xffffffff);
+ { uint32_t x26; uint8_t x27 = _addcarryx_u32(x23, x11, x24, &x26);
+ { uint32_t x28 = (x19 & 0xffffffff);
+ { uint32_t x30; uint8_t x31 = _addcarryx_u32(x27, x14, x28, &x30);
+ { uint32_t x32 = (x19 & 0x7fffffff);
+ { uint32_t x34; uint8_t _ = _addcarryx_u32(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e127m1/fesub.c b/src/Specific/montgomery32_2e127m1/fesub.c
index 573f8a4f7..1277222cd 100644
--- a/src/Specific/montgomery32_2e127m1/fesub.c
+++ b/src/Specific/montgomery32_2e127m1/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint32_t x17; uint8_t x18 = _subborrow_u32(0x0, x5, x11, &x17);
-{ uint32_t x20; uint8_t x21 = _subborrow_u32(x18, x7, x13, &x20);
-{ uint32_t x23; uint8_t x24 = _subborrow_u32(x21, x9, x15, &x23);
-{ uint32_t x26; uint8_t x27 = _subborrow_u32(x24, x8, x14, &x26);
-{ uint32_t x28 = (uint32_t)cmovznz(x27, 0x0, 0xffffffff);
-{ uint32_t x29 = (x28 & 0xffffffff);
-{ uint32_t x31; uint8_t x32 = _addcarryx_u32(0x0, x17, x29, &x31);
-{ uint32_t x33 = (x28 & 0xffffffff);
-{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x20, x33, &x35);
-{ uint32_t x37 = (x28 & 0xffffffff);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x23, x37, &x39);
-{ uint32_t x41 = (x28 & 0x7fffffff);
-{ uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint32_t out[4], const uint32_t in1[4], const uint32_t in2[4]) {
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x14 = in2[3];
+ { const uint32_t x15 = in2[2];
+ { const uint32_t x13 = in2[1];
+ { const uint32_t x11 = in2[0];
+ { uint32_t x17; uint8_t x18 = _subborrow_u32(0x0, x5, x11, &x17);
+ { uint32_t x20; uint8_t x21 = _subborrow_u32(x18, x7, x13, &x20);
+ { uint32_t x23; uint8_t x24 = _subborrow_u32(x21, x9, x15, &x23);
+ { uint32_t x26; uint8_t x27 = _subborrow_u32(x24, x8, x14, &x26);
+ { uint32_t x28 = (uint32_t)cmovznz(x27, 0x0, 0xffffffff);
+ { uint32_t x29 = (x28 & 0xffffffff);
+ { uint32_t x31; uint8_t x32 = _addcarryx_u32(0x0, x17, x29, &x31);
+ { uint32_t x33 = (x28 & 0xffffffff);
+ { uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x20, x33, &x35);
+ { uint32_t x37 = (x28 & 0xffffffff);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x23, x37, &x39);
+ { uint32_t x41 = (x28 & 0x7fffffff);
+ { uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e129m25/feadd.c b/src/Specific/montgomery32_2e129m25/feadd.c
index 51faddc1d..45c2ffda2 100644
--- a/src/Specific/montgomery32_2e129m25/feadd.c
+++ b/src/Specific/montgomery32_2e129m25/feadd.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
-{ uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
-{ uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
-{ uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
-{ uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
-{ uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xffffffe7, &x36);
-{ uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
-{ uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
-{ uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
-{ uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0x1, &x48);
-{ uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
-{ uint32_t x53 = cmovznz(x52, x48, x33);
-{ uint32_t x54 = cmovznz(x52, x45, x30);
-{ uint32_t x55 = cmovznz(x52, x42, x27);
-{ uint32_t x56 = cmovznz(x52, x39, x24);
-{ uint32_t x57 = cmovznz(x52, x36, x21);
-out[0] = x53;
-out[1] = x54;
-out[2] = x55;
-out[3] = x56;
-out[4] = x57;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feadd(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
+ { uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
+ { uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
+ { uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
+ { uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
+ { uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xffffffe7, &x36);
+ { uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
+ { uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
+ { uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
+ { uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0x1, &x48);
+ { uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
+ { uint32_t x53 = cmovznz(x52, x48, x33);
+ { uint32_t x54 = cmovznz(x52, x45, x30);
+ { uint32_t x55 = cmovznz(x52, x42, x27);
+ { uint32_t x56 = cmovznz(x52, x39, x24);
+ { uint32_t x57 = cmovznz(x52, x36, x21);
+ out[0] = x57;
+ out[1] = x56;
+ out[2] = x55;
+ out[3] = x54;
+ out[4] = x53;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e129m25/femul.c b/src/Specific/montgomery32_2e129m25/femul.c
index b4e56de2d..2b111a22e 100644
--- a/src/Specific/montgomery32_2e129m25/femul.c
+++ b/src/Specific/montgomery32_2e129m25/femul.c
@@ -1,190 +1,182 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
-{ uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
-{ uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
-{ uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
-{ uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
-{ uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
-{ uint32_t _; uint32_t x51 = _mulx_u32(x21, 0xc28f5c29, &_);
-{ uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xffffffe7, &x55);
-{ uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
-{ uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
-{ uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
-{ uint32_t x66; uint8_t x67 = _addcarryx_u32(0x0, x55, x57, &x66);
-{ uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x58, x60, &x69);
-{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x61, x63, &x72);
-{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x64, x51, &x75);
-{ uint32_t _; uint8_t x79 = _addcarryx_u32(0x0, x21, x54, &_);
-{ uint32_t x81; uint8_t x82 = _addcarryx_u32(x79, x36, x66, &x81);
-{ uint32_t x84; uint8_t x85 = _addcarryx_u32(x82, x39, x69, &x84);
-{ uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x42, x72, &x87);
-{ uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x45, x75, &x90);
-{ uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x48, x76, &x93);
-{ uint32_t x97; uint32_t x96 = _mulx_u32(x7, x13, &x97);
-{ uint32_t x100; uint32_t x99 = _mulx_u32(x7, x15, &x100);
-{ uint32_t x103; uint32_t x102 = _mulx_u32(x7, x17, &x103);
-{ uint32_t x106; uint32_t x105 = _mulx_u32(x7, x19, &x106);
-{ uint32_t x109; uint32_t x108 = _mulx_u32(x7, x18, &x109);
-{ uint32_t x111; uint8_t x112 = _addcarryx_u32(0x0, x97, x99, &x111);
-{ uint32_t x114; uint8_t x115 = _addcarryx_u32(x112, x100, x102, &x114);
-{ uint32_t x117; uint8_t x118 = _addcarryx_u32(x115, x103, x105, &x117);
-{ uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
-{ uint32_t x123; uint8_t _ = _addcarryx_u32(0x0, x121, x109, &x123);
-{ uint32_t x126; uint8_t x127 = _addcarryx_u32(0x0, x81, x96, &x126);
-{ uint32_t x129; uint8_t x130 = _addcarryx_u32(x127, x84, x111, &x129);
-{ uint32_t x132; uint8_t x133 = _addcarryx_u32(x130, x87, x114, &x132);
-{ uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
-{ uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
-{ uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x94, x123, &x141);
-{ uint32_t _; uint32_t x144 = _mulx_u32(x126, 0xc28f5c29, &_);
-{ uint32_t x148; uint32_t x147 = _mulx_u32(x144, 0xffffffe7, &x148);
-{ uint32_t x151; uint32_t x150 = _mulx_u32(x144, 0xffffffff, &x151);
-{ uint32_t x154; uint32_t x153 = _mulx_u32(x144, 0xffffffff, &x154);
-{ uint32_t x157; uint32_t x156 = _mulx_u32(x144, 0xffffffff, &x157);
-{ uint32_t x159; uint8_t x160 = _addcarryx_u32(0x0, x148, x150, &x159);
-{ uint32_t x162; uint8_t x163 = _addcarryx_u32(x160, x151, x153, &x162);
-{ uint32_t x165; uint8_t x166 = _addcarryx_u32(x163, x154, x156, &x165);
-{ uint32_t x168; uint8_t x169 = _addcarryx_u32(x166, x157, x144, &x168);
-{ uint32_t _; uint8_t x172 = _addcarryx_u32(0x0, x126, x147, &_);
-{ uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x129, x159, &x174);
-{ uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x132, x162, &x177);
-{ uint32_t x180; uint8_t x181 = _addcarryx_u32(x178, x135, x165, &x180);
-{ uint32_t x183; uint8_t x184 = _addcarryx_u32(x181, x138, x168, &x183);
-{ uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x141, x169, &x186);
-{ uint8_t x188 = (x187 + x142);
-{ uint32_t x191; uint32_t x190 = _mulx_u32(x9, x13, &x191);
-{ uint32_t x194; uint32_t x193 = _mulx_u32(x9, x15, &x194);
-{ uint32_t x197; uint32_t x196 = _mulx_u32(x9, x17, &x197);
-{ uint32_t x200; uint32_t x199 = _mulx_u32(x9, x19, &x200);
-{ uint32_t x203; uint32_t x202 = _mulx_u32(x9, x18, &x203);
-{ uint32_t x205; uint8_t x206 = _addcarryx_u32(0x0, x191, x193, &x205);
-{ uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x194, x196, &x208);
-{ uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x197, x199, &x211);
-{ uint32_t x214; uint8_t x215 = _addcarryx_u32(x212, x200, x202, &x214);
-{ uint32_t x217; uint8_t _ = _addcarryx_u32(0x0, x215, x203, &x217);
-{ uint32_t x220; uint8_t x221 = _addcarryx_u32(0x0, x174, x190, &x220);
-{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x177, x205, &x223);
-{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x180, x208, &x226);
-{ uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x183, x211, &x229);
-{ uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x186, x214, &x232);
-{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x188, x217, &x235);
-{ uint32_t _; uint32_t x238 = _mulx_u32(x220, 0xc28f5c29, &_);
-{ uint32_t x242; uint32_t x241 = _mulx_u32(x238, 0xffffffe7, &x242);
-{ uint32_t x245; uint32_t x244 = _mulx_u32(x238, 0xffffffff, &x245);
-{ uint32_t x248; uint32_t x247 = _mulx_u32(x238, 0xffffffff, &x248);
-{ uint32_t x251; uint32_t x250 = _mulx_u32(x238, 0xffffffff, &x251);
-{ uint32_t x253; uint8_t x254 = _addcarryx_u32(0x0, x242, x244, &x253);
-{ uint32_t x256; uint8_t x257 = _addcarryx_u32(x254, x245, x247, &x256);
-{ uint32_t x259; uint8_t x260 = _addcarryx_u32(x257, x248, x250, &x259);
-{ uint32_t x262; uint8_t x263 = _addcarryx_u32(x260, x251, x238, &x262);
-{ uint32_t _; uint8_t x266 = _addcarryx_u32(0x0, x220, x241, &_);
-{ uint32_t x268; uint8_t x269 = _addcarryx_u32(x266, x223, x253, &x268);
-{ uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x226, x256, &x271);
-{ uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x229, x259, &x274);
-{ uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x232, x262, &x277);
-{ uint32_t x280; uint8_t x281 = _addcarryx_u32(x278, x235, x263, &x280);
-{ uint8_t x282 = (x281 + x236);
-{ uint32_t x285; uint32_t x284 = _mulx_u32(x11, x13, &x285);
-{ uint32_t x288; uint32_t x287 = _mulx_u32(x11, x15, &x288);
-{ uint32_t x291; uint32_t x290 = _mulx_u32(x11, x17, &x291);
-{ uint32_t x294; uint32_t x293 = _mulx_u32(x11, x19, &x294);
-{ uint32_t x297; uint32_t x296 = _mulx_u32(x11, x18, &x297);
-{ uint32_t x299; uint8_t x300 = _addcarryx_u32(0x0, x285, x287, &x299);
-{ uint32_t x302; uint8_t x303 = _addcarryx_u32(x300, x288, x290, &x302);
-{ uint32_t x305; uint8_t x306 = _addcarryx_u32(x303, x291, x293, &x305);
-{ uint32_t x308; uint8_t x309 = _addcarryx_u32(x306, x294, x296, &x308);
-{ uint32_t x311; uint8_t _ = _addcarryx_u32(0x0, x309, x297, &x311);
-{ uint32_t x314; uint8_t x315 = _addcarryx_u32(0x0, x268, x284, &x314);
-{ uint32_t x317; uint8_t x318 = _addcarryx_u32(x315, x271, x299, &x317);
-{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x274, x302, &x320);
-{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x277, x305, &x323);
-{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x280, x308, &x326);
-{ uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x282, x311, &x329);
-{ uint32_t _; uint32_t x332 = _mulx_u32(x314, 0xc28f5c29, &_);
-{ uint32_t x336; uint32_t x335 = _mulx_u32(x332, 0xffffffe7, &x336);
-{ uint32_t x339; uint32_t x338 = _mulx_u32(x332, 0xffffffff, &x339);
-{ uint32_t x342; uint32_t x341 = _mulx_u32(x332, 0xffffffff, &x342);
-{ uint32_t x345; uint32_t x344 = _mulx_u32(x332, 0xffffffff, &x345);
-{ uint32_t x347; uint8_t x348 = _addcarryx_u32(0x0, x336, x338, &x347);
-{ uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x339, x341, &x350);
-{ uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x342, x344, &x353);
-{ uint32_t x356; uint8_t x357 = _addcarryx_u32(x354, x345, x332, &x356);
-{ uint32_t _; uint8_t x360 = _addcarryx_u32(0x0, x314, x335, &_);
-{ uint32_t x362; uint8_t x363 = _addcarryx_u32(x360, x317, x347, &x362);
-{ uint32_t x365; uint8_t x366 = _addcarryx_u32(x363, x320, x350, &x365);
-{ uint32_t x368; uint8_t x369 = _addcarryx_u32(x366, x323, x353, &x368);
-{ uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x326, x356, &x371);
-{ uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x329, x357, &x374);
-{ uint8_t x376 = (x375 + x330);
-{ uint32_t x379; uint32_t x378 = _mulx_u32(x10, x13, &x379);
-{ uint32_t x382; uint32_t x381 = _mulx_u32(x10, x15, &x382);
-{ uint32_t x385; uint32_t x384 = _mulx_u32(x10, x17, &x385);
-{ uint32_t x388; uint32_t x387 = _mulx_u32(x10, x19, &x388);
-{ uint32_t x391; uint32_t x390 = _mulx_u32(x10, x18, &x391);
-{ uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x379, x381, &x393);
-{ uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x382, x384, &x396);
-{ uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x385, x387, &x399);
-{ uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x388, x390, &x402);
-{ uint32_t x405; uint8_t _ = _addcarryx_u32(0x0, x403, x391, &x405);
-{ uint32_t x408; uint8_t x409 = _addcarryx_u32(0x0, x362, x378, &x408);
-{ uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x365, x393, &x411);
-{ uint32_t x414; uint8_t x415 = _addcarryx_u32(x412, x368, x396, &x414);
-{ uint32_t x417; uint8_t x418 = _addcarryx_u32(x415, x371, x399, &x417);
-{ uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x374, x402, &x420);
-{ uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x376, x405, &x423);
-{ uint32_t _; uint32_t x426 = _mulx_u32(x408, 0xc28f5c29, &_);
-{ uint32_t x430; uint32_t x429 = _mulx_u32(x426, 0xffffffe7, &x430);
-{ uint32_t x433; uint32_t x432 = _mulx_u32(x426, 0xffffffff, &x433);
-{ uint32_t x436; uint32_t x435 = _mulx_u32(x426, 0xffffffff, &x436);
-{ uint32_t x439; uint32_t x438 = _mulx_u32(x426, 0xffffffff, &x439);
-{ uint32_t x441; uint8_t x442 = _addcarryx_u32(0x0, x430, x432, &x441);
-{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x433, x435, &x444);
-{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x436, x438, &x447);
-{ uint32_t x450; uint8_t x451 = _addcarryx_u32(x448, x439, x426, &x450);
-{ uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x408, x429, &_);
-{ uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x411, x441, &x456);
-{ uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x414, x444, &x459);
-{ uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x417, x447, &x462);
-{ uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x420, x450, &x465);
-{ uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x423, x451, &x468);
-{ uint8_t x470 = (x469 + x424);
-{ uint32_t x472; uint8_t x473 = _subborrow_u32(0x0, x456, 0xffffffe7, &x472);
-{ uint32_t x475; uint8_t x476 = _subborrow_u32(x473, x459, 0xffffffff, &x475);
-{ uint32_t x478; uint8_t x479 = _subborrow_u32(x476, x462, 0xffffffff, &x478);
-{ uint32_t x481; uint8_t x482 = _subborrow_u32(x479, x465, 0xffffffff, &x481);
-{ uint32_t x484; uint8_t x485 = _subborrow_u32(x482, x468, 0x1, &x484);
-{ uint32_t _; uint8_t x488 = _subborrow_u32(x485, x470, 0x0, &_);
-{ uint32_t x489 = cmovznz(x488, x484, x468);
-{ uint32_t x490 = cmovznz(x488, x481, x465);
-{ uint32_t x491 = cmovznz(x488, x478, x462);
-{ uint32_t x492 = cmovznz(x488, x475, x459);
-{ uint32_t x493 = cmovznz(x488, x472, x456);
-out[0] = x489;
-out[1] = x490;
-out[2] = x491;
-out[3] = x492;
-out[4] = x493;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
+ { uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
+ { uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
+ { uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
+ { uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
+ { uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
+ { uint32_t _; uint32_t x51 = _mulx_u32(x21, 0xc28f5c29, &_);
+ { uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xffffffe7, &x55);
+ { uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
+ { uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
+ { uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
+ { uint32_t x66; uint8_t x67 = _addcarryx_u32(0x0, x55, x57, &x66);
+ { uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x58, x60, &x69);
+ { uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x61, x63, &x72);
+ { uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x64, x51, &x75);
+ { uint32_t _; uint8_t x79 = _addcarryx_u32(0x0, x21, x54, &_);
+ { uint32_t x81; uint8_t x82 = _addcarryx_u32(x79, x36, x66, &x81);
+ { uint32_t x84; uint8_t x85 = _addcarryx_u32(x82, x39, x69, &x84);
+ { uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x42, x72, &x87);
+ { uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x45, x75, &x90);
+ { uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x48, x76, &x93);
+ { uint32_t x97; uint32_t x96 = _mulx_u32(x7, x13, &x97);
+ { uint32_t x100; uint32_t x99 = _mulx_u32(x7, x15, &x100);
+ { uint32_t x103; uint32_t x102 = _mulx_u32(x7, x17, &x103);
+ { uint32_t x106; uint32_t x105 = _mulx_u32(x7, x19, &x106);
+ { uint32_t x109; uint32_t x108 = _mulx_u32(x7, x18, &x109);
+ { uint32_t x111; uint8_t x112 = _addcarryx_u32(0x0, x97, x99, &x111);
+ { uint32_t x114; uint8_t x115 = _addcarryx_u32(x112, x100, x102, &x114);
+ { uint32_t x117; uint8_t x118 = _addcarryx_u32(x115, x103, x105, &x117);
+ { uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
+ { uint32_t x123; uint8_t _ = _addcarryx_u32(0x0, x121, x109, &x123);
+ { uint32_t x126; uint8_t x127 = _addcarryx_u32(0x0, x81, x96, &x126);
+ { uint32_t x129; uint8_t x130 = _addcarryx_u32(x127, x84, x111, &x129);
+ { uint32_t x132; uint8_t x133 = _addcarryx_u32(x130, x87, x114, &x132);
+ { uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
+ { uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
+ { uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x94, x123, &x141);
+ { uint32_t _; uint32_t x144 = _mulx_u32(x126, 0xc28f5c29, &_);
+ { uint32_t x148; uint32_t x147 = _mulx_u32(x144, 0xffffffe7, &x148);
+ { uint32_t x151; uint32_t x150 = _mulx_u32(x144, 0xffffffff, &x151);
+ { uint32_t x154; uint32_t x153 = _mulx_u32(x144, 0xffffffff, &x154);
+ { uint32_t x157; uint32_t x156 = _mulx_u32(x144, 0xffffffff, &x157);
+ { uint32_t x159; uint8_t x160 = _addcarryx_u32(0x0, x148, x150, &x159);
+ { uint32_t x162; uint8_t x163 = _addcarryx_u32(x160, x151, x153, &x162);
+ { uint32_t x165; uint8_t x166 = _addcarryx_u32(x163, x154, x156, &x165);
+ { uint32_t x168; uint8_t x169 = _addcarryx_u32(x166, x157, x144, &x168);
+ { uint32_t _; uint8_t x172 = _addcarryx_u32(0x0, x126, x147, &_);
+ { uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x129, x159, &x174);
+ { uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x132, x162, &x177);
+ { uint32_t x180; uint8_t x181 = _addcarryx_u32(x178, x135, x165, &x180);
+ { uint32_t x183; uint8_t x184 = _addcarryx_u32(x181, x138, x168, &x183);
+ { uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x141, x169, &x186);
+ { uint8_t x188 = (x187 + x142);
+ { uint32_t x191; uint32_t x190 = _mulx_u32(x9, x13, &x191);
+ { uint32_t x194; uint32_t x193 = _mulx_u32(x9, x15, &x194);
+ { uint32_t x197; uint32_t x196 = _mulx_u32(x9, x17, &x197);
+ { uint32_t x200; uint32_t x199 = _mulx_u32(x9, x19, &x200);
+ { uint32_t x203; uint32_t x202 = _mulx_u32(x9, x18, &x203);
+ { uint32_t x205; uint8_t x206 = _addcarryx_u32(0x0, x191, x193, &x205);
+ { uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x194, x196, &x208);
+ { uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x197, x199, &x211);
+ { uint32_t x214; uint8_t x215 = _addcarryx_u32(x212, x200, x202, &x214);
+ { uint32_t x217; uint8_t _ = _addcarryx_u32(0x0, x215, x203, &x217);
+ { uint32_t x220; uint8_t x221 = _addcarryx_u32(0x0, x174, x190, &x220);
+ { uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x177, x205, &x223);
+ { uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x180, x208, &x226);
+ { uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x183, x211, &x229);
+ { uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x186, x214, &x232);
+ { uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x188, x217, &x235);
+ { uint32_t _; uint32_t x238 = _mulx_u32(x220, 0xc28f5c29, &_);
+ { uint32_t x242; uint32_t x241 = _mulx_u32(x238, 0xffffffe7, &x242);
+ { uint32_t x245; uint32_t x244 = _mulx_u32(x238, 0xffffffff, &x245);
+ { uint32_t x248; uint32_t x247 = _mulx_u32(x238, 0xffffffff, &x248);
+ { uint32_t x251; uint32_t x250 = _mulx_u32(x238, 0xffffffff, &x251);
+ { uint32_t x253; uint8_t x254 = _addcarryx_u32(0x0, x242, x244, &x253);
+ { uint32_t x256; uint8_t x257 = _addcarryx_u32(x254, x245, x247, &x256);
+ { uint32_t x259; uint8_t x260 = _addcarryx_u32(x257, x248, x250, &x259);
+ { uint32_t x262; uint8_t x263 = _addcarryx_u32(x260, x251, x238, &x262);
+ { uint32_t _; uint8_t x266 = _addcarryx_u32(0x0, x220, x241, &_);
+ { uint32_t x268; uint8_t x269 = _addcarryx_u32(x266, x223, x253, &x268);
+ { uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x226, x256, &x271);
+ { uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x229, x259, &x274);
+ { uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x232, x262, &x277);
+ { uint32_t x280; uint8_t x281 = _addcarryx_u32(x278, x235, x263, &x280);
+ { uint8_t x282 = (x281 + x236);
+ { uint32_t x285; uint32_t x284 = _mulx_u32(x11, x13, &x285);
+ { uint32_t x288; uint32_t x287 = _mulx_u32(x11, x15, &x288);
+ { uint32_t x291; uint32_t x290 = _mulx_u32(x11, x17, &x291);
+ { uint32_t x294; uint32_t x293 = _mulx_u32(x11, x19, &x294);
+ { uint32_t x297; uint32_t x296 = _mulx_u32(x11, x18, &x297);
+ { uint32_t x299; uint8_t x300 = _addcarryx_u32(0x0, x285, x287, &x299);
+ { uint32_t x302; uint8_t x303 = _addcarryx_u32(x300, x288, x290, &x302);
+ { uint32_t x305; uint8_t x306 = _addcarryx_u32(x303, x291, x293, &x305);
+ { uint32_t x308; uint8_t x309 = _addcarryx_u32(x306, x294, x296, &x308);
+ { uint32_t x311; uint8_t _ = _addcarryx_u32(0x0, x309, x297, &x311);
+ { uint32_t x314; uint8_t x315 = _addcarryx_u32(0x0, x268, x284, &x314);
+ { uint32_t x317; uint8_t x318 = _addcarryx_u32(x315, x271, x299, &x317);
+ { uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x274, x302, &x320);
+ { uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x277, x305, &x323);
+ { uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x280, x308, &x326);
+ { uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x282, x311, &x329);
+ { uint32_t _; uint32_t x332 = _mulx_u32(x314, 0xc28f5c29, &_);
+ { uint32_t x336; uint32_t x335 = _mulx_u32(x332, 0xffffffe7, &x336);
+ { uint32_t x339; uint32_t x338 = _mulx_u32(x332, 0xffffffff, &x339);
+ { uint32_t x342; uint32_t x341 = _mulx_u32(x332, 0xffffffff, &x342);
+ { uint32_t x345; uint32_t x344 = _mulx_u32(x332, 0xffffffff, &x345);
+ { uint32_t x347; uint8_t x348 = _addcarryx_u32(0x0, x336, x338, &x347);
+ { uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x339, x341, &x350);
+ { uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x342, x344, &x353);
+ { uint32_t x356; uint8_t x357 = _addcarryx_u32(x354, x345, x332, &x356);
+ { uint32_t _; uint8_t x360 = _addcarryx_u32(0x0, x314, x335, &_);
+ { uint32_t x362; uint8_t x363 = _addcarryx_u32(x360, x317, x347, &x362);
+ { uint32_t x365; uint8_t x366 = _addcarryx_u32(x363, x320, x350, &x365);
+ { uint32_t x368; uint8_t x369 = _addcarryx_u32(x366, x323, x353, &x368);
+ { uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x326, x356, &x371);
+ { uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x329, x357, &x374);
+ { uint8_t x376 = (x375 + x330);
+ { uint32_t x379; uint32_t x378 = _mulx_u32(x10, x13, &x379);
+ { uint32_t x382; uint32_t x381 = _mulx_u32(x10, x15, &x382);
+ { uint32_t x385; uint32_t x384 = _mulx_u32(x10, x17, &x385);
+ { uint32_t x388; uint32_t x387 = _mulx_u32(x10, x19, &x388);
+ { uint32_t x391; uint32_t x390 = _mulx_u32(x10, x18, &x391);
+ { uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x379, x381, &x393);
+ { uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x382, x384, &x396);
+ { uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x385, x387, &x399);
+ { uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x388, x390, &x402);
+ { uint32_t x405; uint8_t _ = _addcarryx_u32(0x0, x403, x391, &x405);
+ { uint32_t x408; uint8_t x409 = _addcarryx_u32(0x0, x362, x378, &x408);
+ { uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x365, x393, &x411);
+ { uint32_t x414; uint8_t x415 = _addcarryx_u32(x412, x368, x396, &x414);
+ { uint32_t x417; uint8_t x418 = _addcarryx_u32(x415, x371, x399, &x417);
+ { uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x374, x402, &x420);
+ { uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x376, x405, &x423);
+ { uint32_t _; uint32_t x426 = _mulx_u32(x408, 0xc28f5c29, &_);
+ { uint32_t x430; uint32_t x429 = _mulx_u32(x426, 0xffffffe7, &x430);
+ { uint32_t x433; uint32_t x432 = _mulx_u32(x426, 0xffffffff, &x433);
+ { uint32_t x436; uint32_t x435 = _mulx_u32(x426, 0xffffffff, &x436);
+ { uint32_t x439; uint32_t x438 = _mulx_u32(x426, 0xffffffff, &x439);
+ { uint32_t x441; uint8_t x442 = _addcarryx_u32(0x0, x430, x432, &x441);
+ { uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x433, x435, &x444);
+ { uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x436, x438, &x447);
+ { uint32_t x450; uint8_t x451 = _addcarryx_u32(x448, x439, x426, &x450);
+ { uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x408, x429, &_);
+ { uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x411, x441, &x456);
+ { uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x414, x444, &x459);
+ { uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x417, x447, &x462);
+ { uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x420, x450, &x465);
+ { uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x423, x451, &x468);
+ { uint8_t x470 = (x469 + x424);
+ { uint32_t x472; uint8_t x473 = _subborrow_u32(0x0, x456, 0xffffffe7, &x472);
+ { uint32_t x475; uint8_t x476 = _subborrow_u32(x473, x459, 0xffffffff, &x475);
+ { uint32_t x478; uint8_t x479 = _subborrow_u32(x476, x462, 0xffffffff, &x478);
+ { uint32_t x481; uint8_t x482 = _subborrow_u32(x479, x465, 0xffffffff, &x481);
+ { uint32_t x484; uint8_t x485 = _subborrow_u32(x482, x468, 0x1, &x484);
+ { uint32_t _; uint8_t x488 = _subborrow_u32(x485, x470, 0x0, &_);
+ { uint32_t x489 = cmovznz(x488, x484, x468);
+ { uint32_t x490 = cmovznz(x488, x481, x465);
+ { uint32_t x491 = cmovznz(x488, x478, x462);
+ { uint32_t x492 = cmovznz(x488, x475, x459);
+ { uint32_t x493 = cmovznz(x488, x472, x456);
+ out[0] = x493;
+ out[1] = x492;
+ out[2] = x491;
+ out[3] = x490;
+ out[4] = x489;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e129m25/fenz.c b/src/Specific/montgomery32_2e129m25/fenz.c
index 755695e18..5601e8c15 100644
--- a/src/Specific/montgomery32_2e129m25/fenz.c
+++ b/src/Specific/montgomery32_2e129m25/fenz.c
@@ -1,26 +1,13 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x9 = (x8 | x7);
-{ uint32_t x10 = (x6 | x9);
-{ uint32_t x11 = (x4 | x10);
-{ uint32_t x12 = (x2 | x11);
-out[0] = x12;
-}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[5]) {
+ { const uint32_t x7 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x9 = (x8 | x7);
+ { uint32_t x10 = (x6 | x9);
+ { uint32_t x11 = (x4 | x10);
+ { uint32_t x12 = (x2 | x11);
+ out[0] = x12;
+ }}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e129m25/feopp.c b/src/Specific/montgomery32_2e129m25/feopp.c
index 4d8e13b5f..c1ce379d3 100644
--- a/src/Specific/montgomery32_2e129m25/feopp.c
+++ b/src/Specific/montgomery32_2e129m25/feopp.c
@@ -1,42 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
-{ uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
-{ uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
-{ uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
-{ uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
-{ uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
-{ uint32_t x25 = (x24 & 0xffffffe7);
-{ uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
-{ uint32_t x29 = (x24 & 0xffffffff);
-{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
-{ uint32_t x33 = (x24 & 0xffffffff);
-{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
-{ uint32_t x37 = (x24 & 0xffffffff);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
-{ uint8_t x41 = ((uint8_t)x24 & 0x1);
-{ uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-out[4] = x27;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feopp(uint32_t out[5], const uint32_t in1[5]) {
+ { const uint32_t x7 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
+ { uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
+ { uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
+ { uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
+ { uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
+ { uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
+ { uint32_t x25 = (x24 & 0xffffffe7);
+ { uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
+ { uint32_t x29 = (x24 & 0xffffffff);
+ { uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
+ { uint32_t x33 = (x24 & 0xffffffff);
+ { uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
+ { uint32_t x37 = (x24 & 0xffffffff);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
+ { uint8_t x41 = ((uint8_t)x24 & 0x1);
+ { uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e129m25/fesub.c b/src/Specific/montgomery32_2e129m25/fesub.c
index 5bc68d56e..119f0d7da 100644
--- a/src/Specific/montgomery32_2e129m25/fesub.c
+++ b/src/Specific/montgomery32_2e129m25/fesub.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
-{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
-{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
-{ uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
-{ uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
-{ uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
-{ uint32_t x36 = (x35 & 0xffffffe7);
-{ uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
-{ uint32_t x40 = (x35 & 0xffffffff);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
-{ uint32_t x44 = (x35 & 0xffffffff);
-{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
-{ uint32_t x48 = (x35 & 0xffffffff);
-{ uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
-{ uint8_t x52 = ((uint8_t)x35 & 0x1);
-{ uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
-out[0] = x54;
-out[1] = x50;
-out[2] = x46;
-out[3] = x42;
-out[4] = x38;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesub(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
+ { uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
+ { uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
+ { uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
+ { uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
+ { uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
+ { uint32_t x36 = (x35 & 0xffffffe7);
+ { uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
+ { uint32_t x40 = (x35 & 0xffffffff);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
+ { uint32_t x44 = (x35 & 0xffffffff);
+ { uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
+ { uint32_t x48 = (x35 & 0xffffffff);
+ { uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
+ { uint8_t x52 = ((uint8_t)x35 & 0x1);
+ { uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
+ out[0] = x38;
+ out[1] = x42;
+ out[2] = x46;
+ out[3] = x50;
+ out[4] = x54;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e130m5/feadd.c b/src/Specific/montgomery32_2e130m5/feadd.c
index d9c9d32b0..29e05d1ec 100644
--- a/src/Specific/montgomery32_2e130m5/feadd.c
+++ b/src/Specific/montgomery32_2e130m5/feadd.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
-{ uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
-{ uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
-{ uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
-{ uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
-{ uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xfffffffb, &x36);
-{ uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
-{ uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
-{ uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
-{ uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0x3, &x48);
-{ uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
-{ uint32_t x53 = cmovznz(x52, x48, x33);
-{ uint32_t x54 = cmovznz(x52, x45, x30);
-{ uint32_t x55 = cmovznz(x52, x42, x27);
-{ uint32_t x56 = cmovznz(x52, x39, x24);
-{ uint32_t x57 = cmovznz(x52, x36, x21);
-out[0] = x53;
-out[1] = x54;
-out[2] = x55;
-out[3] = x56;
-out[4] = x57;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feadd(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
+ { uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
+ { uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
+ { uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
+ { uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
+ { uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xfffffffb, &x36);
+ { uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
+ { uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
+ { uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
+ { uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0x3, &x48);
+ { uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
+ { uint32_t x53 = cmovznz(x52, x48, x33);
+ { uint32_t x54 = cmovznz(x52, x45, x30);
+ { uint32_t x55 = cmovznz(x52, x42, x27);
+ { uint32_t x56 = cmovznz(x52, x39, x24);
+ { uint32_t x57 = cmovznz(x52, x36, x21);
+ out[0] = x57;
+ out[1] = x56;
+ out[2] = x55;
+ out[3] = x54;
+ out[4] = x53;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e130m5/femul.c b/src/Specific/montgomery32_2e130m5/femul.c
index 786c579e2..5c5664178 100644
--- a/src/Specific/montgomery32_2e130m5/femul.c
+++ b/src/Specific/montgomery32_2e130m5/femul.c
@@ -1,39 +1,192 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
-{ uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
-{ uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
-{ uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
-{ uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
-{ uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
-{ uint32_t _; uint32_t x51 = _mulx_u32(x21, 0xcccccccd, &_);
-{ uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xfffffffb, &x55);
-{ uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
-{ uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
-{ uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
-out[0] = uint32_t x66;
-out[1] = uint8_t x67 = Op Syntax.MulSplit 32 Syntax.TWord 5 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 3 x51;
-out[2] = 0x3;;
-}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
+ { uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
+ { uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
+ { uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
+ { uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
+ { uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
+ { uint32_t _; uint32_t x51 = _mulx_u32(x21, 0xcccccccd, &_);
+ { uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xfffffffb, &x55);
+ { uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
+ { uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
+ { uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
+ { uint32_t x66, uint8_t x67 = Op (Syntax.MulSplit 32 (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x51, 0x3);
+ { uint32_t x69; uint8_t x70 = _addcarryx_u32(0x0, x55, x57, &x69);
+ { uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x58, x60, &x72);
+ { uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x61, x63, &x75);
+ { uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x64, x66, &x78);
+ { uint8_t x80 = (x79 + x67);
+ { uint32_t _; uint8_t x83 = _addcarryx_u32(0x0, x21, x54, &_);
+ { uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x36, x69, &x85);
+ { uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x39, x72, &x88);
+ { uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x42, x75, &x91);
+ { uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x45, x78, &x94);
+ { uint32_t x97; uint8_t x98 = _addcarryx_u32(x95, x48, x80, &x97);
+ { uint32_t x101; uint32_t x100 = _mulx_u32(x7, x13, &x101);
+ { uint32_t x104; uint32_t x103 = _mulx_u32(x7, x15, &x104);
+ { uint32_t x107; uint32_t x106 = _mulx_u32(x7, x17, &x107);
+ { uint32_t x110; uint32_t x109 = _mulx_u32(x7, x19, &x110);
+ { uint32_t x113; uint32_t x112 = _mulx_u32(x7, x18, &x113);
+ { uint32_t x115; uint8_t x116 = _addcarryx_u32(0x0, x101, x103, &x115);
+ { uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x104, x106, &x118);
+ { uint32_t x121; uint8_t x122 = _addcarryx_u32(x119, x107, x109, &x121);
+ { uint32_t x124; uint8_t x125 = _addcarryx_u32(x122, x110, x112, &x124);
+ { uint32_t x127; uint8_t _ = _addcarryx_u32(0x0, x125, x113, &x127);
+ { uint32_t x130; uint8_t x131 = _addcarryx_u32(0x0, x85, x100, &x130);
+ { uint32_t x133; uint8_t x134 = _addcarryx_u32(x131, x88, x115, &x133);
+ { uint32_t x136; uint8_t x137 = _addcarryx_u32(x134, x91, x118, &x136);
+ { uint32_t x139; uint8_t x140 = _addcarryx_u32(x137, x94, x121, &x139);
+ { uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x97, x124, &x142);
+ { uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x98, x127, &x145);
+ { uint32_t _; uint32_t x148 = _mulx_u32(x130, 0xcccccccd, &_);
+ { uint32_t x152; uint32_t x151 = _mulx_u32(x148, 0xfffffffb, &x152);
+ { uint32_t x155; uint32_t x154 = _mulx_u32(x148, 0xffffffff, &x155);
+ { uint32_t x158; uint32_t x157 = _mulx_u32(x148, 0xffffffff, &x158);
+ { uint32_t x161; uint32_t x160 = _mulx_u32(x148, 0xffffffff, &x161);
+ { uint32_t x163, uint8_t x164 = Op (Syntax.MulSplit 32 (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x148, 0x3);
+ { uint32_t x166; uint8_t x167 = _addcarryx_u32(0x0, x152, x154, &x166);
+ { uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x155, x157, &x169);
+ { uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x158, x160, &x172);
+ { uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x161, x163, &x175);
+ { uint8_t x177 = (x176 + x164);
+ { uint32_t _; uint8_t x180 = _addcarryx_u32(0x0, x130, x151, &_);
+ { uint32_t x182; uint8_t x183 = _addcarryx_u32(x180, x133, x166, &x182);
+ { uint32_t x185; uint8_t x186 = _addcarryx_u32(x183, x136, x169, &x185);
+ { uint32_t x188; uint8_t x189 = _addcarryx_u32(x186, x139, x172, &x188);
+ { uint32_t x191; uint8_t x192 = _addcarryx_u32(x189, x142, x175, &x191);
+ { uint32_t x194; uint8_t x195 = _addcarryx_u32(x192, x145, x177, &x194);
+ { uint8_t x196 = (x195 + x146);
+ { uint32_t x199; uint32_t x198 = _mulx_u32(x9, x13, &x199);
+ { uint32_t x202; uint32_t x201 = _mulx_u32(x9, x15, &x202);
+ { uint32_t x205; uint32_t x204 = _mulx_u32(x9, x17, &x205);
+ { uint32_t x208; uint32_t x207 = _mulx_u32(x9, x19, &x208);
+ { uint32_t x211; uint32_t x210 = _mulx_u32(x9, x18, &x211);
+ { uint32_t x213; uint8_t x214 = _addcarryx_u32(0x0, x199, x201, &x213);
+ { uint32_t x216; uint8_t x217 = _addcarryx_u32(x214, x202, x204, &x216);
+ { uint32_t x219; uint8_t x220 = _addcarryx_u32(x217, x205, x207, &x219);
+ { uint32_t x222; uint8_t x223 = _addcarryx_u32(x220, x208, x210, &x222);
+ { uint32_t x225; uint8_t _ = _addcarryx_u32(0x0, x223, x211, &x225);
+ { uint32_t x228; uint8_t x229 = _addcarryx_u32(0x0, x182, x198, &x228);
+ { uint32_t x231; uint8_t x232 = _addcarryx_u32(x229, x185, x213, &x231);
+ { uint32_t x234; uint8_t x235 = _addcarryx_u32(x232, x188, x216, &x234);
+ { uint32_t x237; uint8_t x238 = _addcarryx_u32(x235, x191, x219, &x237);
+ { uint32_t x240; uint8_t x241 = _addcarryx_u32(x238, x194, x222, &x240);
+ { uint32_t x243; uint8_t x244 = _addcarryx_u32(x241, x196, x225, &x243);
+ { uint32_t _; uint32_t x246 = _mulx_u32(x228, 0xcccccccd, &_);
+ { uint32_t x250; uint32_t x249 = _mulx_u32(x246, 0xfffffffb, &x250);
+ { uint32_t x253; uint32_t x252 = _mulx_u32(x246, 0xffffffff, &x253);
+ { uint32_t x256; uint32_t x255 = _mulx_u32(x246, 0xffffffff, &x256);
+ { uint32_t x259; uint32_t x258 = _mulx_u32(x246, 0xffffffff, &x259);
+ { uint32_t x261, uint8_t x262 = Op (Syntax.MulSplit 32 (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x246, 0x3);
+ { uint32_t x264; uint8_t x265 = _addcarryx_u32(0x0, x250, x252, &x264);
+ { uint32_t x267; uint8_t x268 = _addcarryx_u32(x265, x253, x255, &x267);
+ { uint32_t x270; uint8_t x271 = _addcarryx_u32(x268, x256, x258, &x270);
+ { uint32_t x273; uint8_t x274 = _addcarryx_u32(x271, x259, x261, &x273);
+ { uint8_t x275 = (x274 + x262);
+ { uint32_t _; uint8_t x278 = _addcarryx_u32(0x0, x228, x249, &_);
+ { uint32_t x280; uint8_t x281 = _addcarryx_u32(x278, x231, x264, &x280);
+ { uint32_t x283; uint8_t x284 = _addcarryx_u32(x281, x234, x267, &x283);
+ { uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x237, x270, &x286);
+ { uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x240, x273, &x289);
+ { uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x243, x275, &x292);
+ { uint8_t x294 = (x293 + x244);
+ { uint32_t x297; uint32_t x296 = _mulx_u32(x11, x13, &x297);
+ { uint32_t x300; uint32_t x299 = _mulx_u32(x11, x15, &x300);
+ { uint32_t x303; uint32_t x302 = _mulx_u32(x11, x17, &x303);
+ { uint32_t x306; uint32_t x305 = _mulx_u32(x11, x19, &x306);
+ { uint32_t x309; uint32_t x308 = _mulx_u32(x11, x18, &x309);
+ { uint32_t x311; uint8_t x312 = _addcarryx_u32(0x0, x297, x299, &x311);
+ { uint32_t x314; uint8_t x315 = _addcarryx_u32(x312, x300, x302, &x314);
+ { uint32_t x317; uint8_t x318 = _addcarryx_u32(x315, x303, x305, &x317);
+ { uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x306, x308, &x320);
+ { uint32_t x323; uint8_t _ = _addcarryx_u32(0x0, x321, x309, &x323);
+ { uint32_t x326; uint8_t x327 = _addcarryx_u32(0x0, x280, x296, &x326);
+ { uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x283, x311, &x329);
+ { uint32_t x332; uint8_t x333 = _addcarryx_u32(x330, x286, x314, &x332);
+ { uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x289, x317, &x335);
+ { uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x292, x320, &x338);
+ { uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x294, x323, &x341);
+ { uint32_t _; uint32_t x344 = _mulx_u32(x326, 0xcccccccd, &_);
+ { uint32_t x348; uint32_t x347 = _mulx_u32(x344, 0xfffffffb, &x348);
+ { uint32_t x351; uint32_t x350 = _mulx_u32(x344, 0xffffffff, &x351);
+ { uint32_t x354; uint32_t x353 = _mulx_u32(x344, 0xffffffff, &x354);
+ { uint32_t x357; uint32_t x356 = _mulx_u32(x344, 0xffffffff, &x357);
+ { uint32_t x359, uint8_t x360 = Op (Syntax.MulSplit 32 (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x344, 0x3);
+ { uint32_t x362; uint8_t x363 = _addcarryx_u32(0x0, x348, x350, &x362);
+ { uint32_t x365; uint8_t x366 = _addcarryx_u32(x363, x351, x353, &x365);
+ { uint32_t x368; uint8_t x369 = _addcarryx_u32(x366, x354, x356, &x368);
+ { uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x357, x359, &x371);
+ { uint8_t x373 = (x372 + x360);
+ { uint32_t _; uint8_t x376 = _addcarryx_u32(0x0, x326, x347, &_);
+ { uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x329, x362, &x378);
+ { uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x332, x365, &x381);
+ { uint32_t x384; uint8_t x385 = _addcarryx_u32(x382, x335, x368, &x384);
+ { uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x338, x371, &x387);
+ { uint32_t x390; uint8_t x391 = _addcarryx_u32(x388, x341, x373, &x390);
+ { uint8_t x392 = (x391 + x342);
+ { uint32_t x395; uint32_t x394 = _mulx_u32(x10, x13, &x395);
+ { uint32_t x398; uint32_t x397 = _mulx_u32(x10, x15, &x398);
+ { uint32_t x401; uint32_t x400 = _mulx_u32(x10, x17, &x401);
+ { uint32_t x404; uint32_t x403 = _mulx_u32(x10, x19, &x404);
+ { uint32_t x407; uint32_t x406 = _mulx_u32(x10, x18, &x407);
+ { uint32_t x409; uint8_t x410 = _addcarryx_u32(0x0, x395, x397, &x409);
+ { uint32_t x412; uint8_t x413 = _addcarryx_u32(x410, x398, x400, &x412);
+ { uint32_t x415; uint8_t x416 = _addcarryx_u32(x413, x401, x403, &x415);
+ { uint32_t x418; uint8_t x419 = _addcarryx_u32(x416, x404, x406, &x418);
+ { uint32_t x421; uint8_t _ = _addcarryx_u32(0x0, x419, x407, &x421);
+ { uint32_t x424; uint8_t x425 = _addcarryx_u32(0x0, x378, x394, &x424);
+ { uint32_t x427; uint8_t x428 = _addcarryx_u32(x425, x381, x409, &x427);
+ { uint32_t x430; uint8_t x431 = _addcarryx_u32(x428, x384, x412, &x430);
+ { uint32_t x433; uint8_t x434 = _addcarryx_u32(x431, x387, x415, &x433);
+ { uint32_t x436; uint8_t x437 = _addcarryx_u32(x434, x390, x418, &x436);
+ { uint32_t x439; uint8_t x440 = _addcarryx_u32(x437, x392, x421, &x439);
+ { uint32_t _; uint32_t x442 = _mulx_u32(x424, 0xcccccccd, &_);
+ { uint32_t x446; uint32_t x445 = _mulx_u32(x442, 0xfffffffb, &x446);
+ { uint32_t x449; uint32_t x448 = _mulx_u32(x442, 0xffffffff, &x449);
+ { uint32_t x452; uint32_t x451 = _mulx_u32(x442, 0xffffffff, &x452);
+ { uint32_t x455; uint32_t x454 = _mulx_u32(x442, 0xffffffff, &x455);
+ { uint32_t x457, uint8_t x458 = Op (Syntax.MulSplit 32 (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x442, 0x3);
+ { uint32_t x460; uint8_t x461 = _addcarryx_u32(0x0, x446, x448, &x460);
+ { uint32_t x463; uint8_t x464 = _addcarryx_u32(x461, x449, x451, &x463);
+ { uint32_t x466; uint8_t x467 = _addcarryx_u32(x464, x452, x454, &x466);
+ { uint32_t x469; uint8_t x470 = _addcarryx_u32(x467, x455, x457, &x469);
+ { uint8_t x471 = (x470 + x458);
+ { uint32_t _; uint8_t x474 = _addcarryx_u32(0x0, x424, x445, &_);
+ { uint32_t x476; uint8_t x477 = _addcarryx_u32(x474, x427, x460, &x476);
+ { uint32_t x479; uint8_t x480 = _addcarryx_u32(x477, x430, x463, &x479);
+ { uint32_t x482; uint8_t x483 = _addcarryx_u32(x480, x433, x466, &x482);
+ { uint32_t x485; uint8_t x486 = _addcarryx_u32(x483, x436, x469, &x485);
+ { uint32_t x488; uint8_t x489 = _addcarryx_u32(x486, x439, x471, &x488);
+ { uint8_t x490 = (x489 + x440);
+ { uint32_t x492; uint8_t x493 = _subborrow_u32(0x0, x476, 0xfffffffb, &x492);
+ { uint32_t x495; uint8_t x496 = _subborrow_u32(x493, x479, 0xffffffff, &x495);
+ { uint32_t x498; uint8_t x499 = _subborrow_u32(x496, x482, 0xffffffff, &x498);
+ { uint32_t x501; uint8_t x502 = _subborrow_u32(x499, x485, 0xffffffff, &x501);
+ { uint32_t x504; uint8_t x505 = _subborrow_u32(x502, x488, 0x3, &x504);
+ { uint32_t _; uint8_t x508 = _subborrow_u32(x505, x490, 0x0, &_);
+ { uint32_t x509 = cmovznz(x508, x504, x488);
+ { uint32_t x510 = cmovznz(x508, x501, x485);
+ { uint32_t x511 = cmovznz(x508, x498, x482);
+ { uint32_t x512 = cmovznz(x508, x495, x479);
+ { uint32_t x513 = cmovznz(x508, x492, x476);
+ out[0] = x513;
+ out[1] = x512;
+ out[2] = x511;
+ out[3] = x510;
+ out[4] = x509;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e130m5/fenz.c b/src/Specific/montgomery32_2e130m5/fenz.c
index 755695e18..5601e8c15 100644
--- a/src/Specific/montgomery32_2e130m5/fenz.c
+++ b/src/Specific/montgomery32_2e130m5/fenz.c
@@ -1,26 +1,13 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x9 = (x8 | x7);
-{ uint32_t x10 = (x6 | x9);
-{ uint32_t x11 = (x4 | x10);
-{ uint32_t x12 = (x2 | x11);
-out[0] = x12;
-}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[5]) {
+ { const uint32_t x7 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x9 = (x8 | x7);
+ { uint32_t x10 = (x6 | x9);
+ { uint32_t x11 = (x4 | x10);
+ { uint32_t x12 = (x2 | x11);
+ out[0] = x12;
+ }}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e130m5/feopp.c b/src/Specific/montgomery32_2e130m5/feopp.c
index 3b8d9927a..2feb96b86 100644
--- a/src/Specific/montgomery32_2e130m5/feopp.c
+++ b/src/Specific/montgomery32_2e130m5/feopp.c
@@ -1,42 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
-{ uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
-{ uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
-{ uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
-{ uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
-{ uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
-{ uint32_t x25 = (x24 & 0xfffffffb);
-{ uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
-{ uint32_t x29 = (x24 & 0xffffffff);
-{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
-{ uint32_t x33 = (x24 & 0xffffffff);
-{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
-{ uint32_t x37 = (x24 & 0xffffffff);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
-{ uint8_t x41 = ((uint8_t)x24 & 0x3);
-{ uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-out[4] = x27;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feopp(uint32_t out[5], const uint32_t in1[5]) {
+ { const uint32_t x7 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
+ { uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
+ { uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
+ { uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
+ { uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
+ { uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
+ { uint32_t x25 = (x24 & 0xfffffffb);
+ { uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
+ { uint32_t x29 = (x24 & 0xffffffff);
+ { uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
+ { uint32_t x33 = (x24 & 0xffffffff);
+ { uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
+ { uint32_t x37 = (x24 & 0xffffffff);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
+ { uint8_t x41 = ((uint8_t)x24 & 0x3);
+ { uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e130m5/fesub.c b/src/Specific/montgomery32_2e130m5/fesub.c
index 7abdaafc0..a5c5f00d5 100644
--- a/src/Specific/montgomery32_2e130m5/fesub.c
+++ b/src/Specific/montgomery32_2e130m5/fesub.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
-{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
-{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
-{ uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
-{ uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
-{ uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
-{ uint32_t x36 = (x35 & 0xfffffffb);
-{ uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
-{ uint32_t x40 = (x35 & 0xffffffff);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
-{ uint32_t x44 = (x35 & 0xffffffff);
-{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
-{ uint32_t x48 = (x35 & 0xffffffff);
-{ uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
-{ uint8_t x52 = ((uint8_t)x35 & 0x3);
-{ uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
-out[0] = x54;
-out[1] = x50;
-out[2] = x46;
-out[3] = x42;
-out[4] = x38;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesub(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
+ { uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
+ { uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
+ { uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
+ { uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
+ { uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
+ { uint32_t x36 = (x35 & 0xfffffffb);
+ { uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
+ { uint32_t x40 = (x35 & 0xffffffff);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
+ { uint32_t x44 = (x35 & 0xffffffff);
+ { uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
+ { uint32_t x48 = (x35 & 0xffffffff);
+ { uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
+ { uint8_t x52 = ((uint8_t)x35 & 0x3);
+ { uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
+ out[0] = x38;
+ out[1] = x42;
+ out[2] = x46;
+ out[3] = x50;
+ out[4] = x54;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e137m13/feadd.c b/src/Specific/montgomery32_2e137m13/feadd.c
index ad954bac8..cae1d30a9 100644
--- a/src/Specific/montgomery32_2e137m13/feadd.c
+++ b/src/Specific/montgomery32_2e137m13/feadd.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
-{ uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
-{ uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
-{ uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
-{ uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
-{ uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xfffffff3, &x36);
-{ uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
-{ uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
-{ uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
-{ uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0x1ff, &x48);
-{ uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
-{ uint32_t x53 = cmovznz(x52, x48, x33);
-{ uint32_t x54 = cmovznz(x52, x45, x30);
-{ uint32_t x55 = cmovznz(x52, x42, x27);
-{ uint32_t x56 = cmovznz(x52, x39, x24);
-{ uint32_t x57 = cmovznz(x52, x36, x21);
-out[0] = x53;
-out[1] = x54;
-out[2] = x55;
-out[3] = x56;
-out[4] = x57;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feadd(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
+ { uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
+ { uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
+ { uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
+ { uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
+ { uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xfffffff3, &x36);
+ { uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
+ { uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
+ { uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
+ { uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0x1ff, &x48);
+ { uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
+ { uint32_t x53 = cmovznz(x52, x48, x33);
+ { uint32_t x54 = cmovznz(x52, x45, x30);
+ { uint32_t x55 = cmovznz(x52, x42, x27);
+ { uint32_t x56 = cmovznz(x52, x39, x24);
+ { uint32_t x57 = cmovznz(x52, x36, x21);
+ out[0] = x57;
+ out[1] = x56;
+ out[2] = x55;
+ out[3] = x54;
+ out[4] = x53;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e137m13/femul.c b/src/Specific/montgomery32_2e137m13/femul.c
index 5cbadc344..fd1e9b76a 100644
--- a/src/Specific/montgomery32_2e137m13/femul.c
+++ b/src/Specific/montgomery32_2e137m13/femul.c
@@ -1,200 +1,192 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
-{ uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
-{ uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
-{ uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
-{ uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
-{ uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
-{ uint32_t _; uint32_t x51 = _mulx_u32(x21, 0xc4ec4ec5, &_);
-{ uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xfffffff3, &x55);
-{ uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
-{ uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
-{ uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
-{ uint32_t x67; uint32_t x66 = _mulx_u32(x51, 0x1ff, &x67);
-{ uint32_t x69; uint8_t x70 = _addcarryx_u32(0x0, x55, x57, &x69);
-{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x58, x60, &x72);
-{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x61, x63, &x75);
-{ uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x64, x66, &x78);
-{ uint32_t x81; uint8_t _ = _addcarryx_u32(0x0, x79, x67, &x81);
-{ uint32_t _; uint8_t x85 = _addcarryx_u32(0x0, x21, x54, &_);
-{ uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x36, x69, &x87);
-{ uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x39, x72, &x90);
-{ uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x42, x75, &x93);
-{ uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x45, x78, &x96);
-{ uint32_t x99; uint8_t x100 = _addcarryx_u32(x97, x48, x81, &x99);
-{ uint32_t x103; uint32_t x102 = _mulx_u32(x7, x13, &x103);
-{ uint32_t x106; uint32_t x105 = _mulx_u32(x7, x15, &x106);
-{ uint32_t x109; uint32_t x108 = _mulx_u32(x7, x17, &x109);
-{ uint32_t x112; uint32_t x111 = _mulx_u32(x7, x19, &x112);
-{ uint32_t x115; uint32_t x114 = _mulx_u32(x7, x18, &x115);
-{ uint32_t x117; uint8_t x118 = _addcarryx_u32(0x0, x103, x105, &x117);
-{ uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
-{ uint32_t x123; uint8_t x124 = _addcarryx_u32(x121, x109, x111, &x123);
-{ uint32_t x126; uint8_t x127 = _addcarryx_u32(x124, x112, x114, &x126);
-{ uint32_t x129; uint8_t _ = _addcarryx_u32(0x0, x127, x115, &x129);
-{ uint32_t x132; uint8_t x133 = _addcarryx_u32(0x0, x87, x102, &x132);
-{ uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
-{ uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
-{ uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x96, x123, &x141);
-{ uint32_t x144; uint8_t x145 = _addcarryx_u32(x142, x99, x126, &x144);
-{ uint32_t x147; uint8_t x148 = _addcarryx_u32(x145, x100, x129, &x147);
-{ uint32_t _; uint32_t x150 = _mulx_u32(x132, 0xc4ec4ec5, &_);
-{ uint32_t x154; uint32_t x153 = _mulx_u32(x150, 0xfffffff3, &x154);
-{ uint32_t x157; uint32_t x156 = _mulx_u32(x150, 0xffffffff, &x157);
-{ uint32_t x160; uint32_t x159 = _mulx_u32(x150, 0xffffffff, &x160);
-{ uint32_t x163; uint32_t x162 = _mulx_u32(x150, 0xffffffff, &x163);
-{ uint32_t x166; uint32_t x165 = _mulx_u32(x150, 0x1ff, &x166);
-{ uint32_t x168; uint8_t x169 = _addcarryx_u32(0x0, x154, x156, &x168);
-{ uint32_t x171; uint8_t x172 = _addcarryx_u32(x169, x157, x159, &x171);
-{ uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x160, x162, &x174);
-{ uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x163, x165, &x177);
-{ uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x166, &x180);
-{ uint32_t _; uint8_t x184 = _addcarryx_u32(0x0, x132, x153, &_);
-{ uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x135, x168, &x186);
-{ uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x138, x171, &x189);
-{ uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x141, x174, &x192);
-{ uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x144, x177, &x195);
-{ uint32_t x198; uint8_t x199 = _addcarryx_u32(x196, x147, x180, &x198);
-{ uint8_t x200 = (x199 + x148);
-{ uint32_t x203; uint32_t x202 = _mulx_u32(x9, x13, &x203);
-{ uint32_t x206; uint32_t x205 = _mulx_u32(x9, x15, &x206);
-{ uint32_t x209; uint32_t x208 = _mulx_u32(x9, x17, &x209);
-{ uint32_t x212; uint32_t x211 = _mulx_u32(x9, x19, &x212);
-{ uint32_t x215; uint32_t x214 = _mulx_u32(x9, x18, &x215);
-{ uint32_t x217; uint8_t x218 = _addcarryx_u32(0x0, x203, x205, &x217);
-{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x206, x208, &x220);
-{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x209, x211, &x223);
-{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x212, x214, &x226);
-{ uint32_t x229; uint8_t _ = _addcarryx_u32(0x0, x227, x215, &x229);
-{ uint32_t x232; uint8_t x233 = _addcarryx_u32(0x0, x186, x202, &x232);
-{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x189, x217, &x235);
-{ uint32_t x238; uint8_t x239 = _addcarryx_u32(x236, x192, x220, &x238);
-{ uint32_t x241; uint8_t x242 = _addcarryx_u32(x239, x195, x223, &x241);
-{ uint32_t x244; uint8_t x245 = _addcarryx_u32(x242, x198, x226, &x244);
-{ uint32_t x247; uint8_t x248 = _addcarryx_u32(x245, x200, x229, &x247);
-{ uint32_t _; uint32_t x250 = _mulx_u32(x232, 0xc4ec4ec5, &_);
-{ uint32_t x254; uint32_t x253 = _mulx_u32(x250, 0xfffffff3, &x254);
-{ uint32_t x257; uint32_t x256 = _mulx_u32(x250, 0xffffffff, &x257);
-{ uint32_t x260; uint32_t x259 = _mulx_u32(x250, 0xffffffff, &x260);
-{ uint32_t x263; uint32_t x262 = _mulx_u32(x250, 0xffffffff, &x263);
-{ uint32_t x266; uint32_t x265 = _mulx_u32(x250, 0x1ff, &x266);
-{ uint32_t x268; uint8_t x269 = _addcarryx_u32(0x0, x254, x256, &x268);
-{ uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x257, x259, &x271);
-{ uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x260, x262, &x274);
-{ uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x263, x265, &x277);
-{ uint32_t x280; uint8_t _ = _addcarryx_u32(0x0, x278, x266, &x280);
-{ uint32_t _; uint8_t x284 = _addcarryx_u32(0x0, x232, x253, &_);
-{ uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x235, x268, &x286);
-{ uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x238, x271, &x289);
-{ uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x241, x274, &x292);
-{ uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x244, x277, &x295);
-{ uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x247, x280, &x298);
-{ uint8_t x300 = (x299 + x248);
-{ uint32_t x303; uint32_t x302 = _mulx_u32(x11, x13, &x303);
-{ uint32_t x306; uint32_t x305 = _mulx_u32(x11, x15, &x306);
-{ uint32_t x309; uint32_t x308 = _mulx_u32(x11, x17, &x309);
-{ uint32_t x312; uint32_t x311 = _mulx_u32(x11, x19, &x312);
-{ uint32_t x315; uint32_t x314 = _mulx_u32(x11, x18, &x315);
-{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x303, x305, &x317);
-{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x306, x308, &x320);
-{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x309, x311, &x323);
-{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x312, x314, &x326);
-{ uint32_t x329; uint8_t _ = _addcarryx_u32(0x0, x327, x315, &x329);
-{ uint32_t x332; uint8_t x333 = _addcarryx_u32(0x0, x286, x302, &x332);
-{ uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x289, x317, &x335);
-{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x292, x320, &x338);
-{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x295, x323, &x341);
-{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x298, x326, &x344);
-{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x300, x329, &x347);
-{ uint32_t _; uint32_t x350 = _mulx_u32(x332, 0xc4ec4ec5, &_);
-{ uint32_t x354; uint32_t x353 = _mulx_u32(x350, 0xfffffff3, &x354);
-{ uint32_t x357; uint32_t x356 = _mulx_u32(x350, 0xffffffff, &x357);
-{ uint32_t x360; uint32_t x359 = _mulx_u32(x350, 0xffffffff, &x360);
-{ uint32_t x363; uint32_t x362 = _mulx_u32(x350, 0xffffffff, &x363);
-{ uint32_t x366; uint32_t x365 = _mulx_u32(x350, 0x1ff, &x366);
-{ uint32_t x368; uint8_t x369 = _addcarryx_u32(0x0, x354, x356, &x368);
-{ uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x357, x359, &x371);
-{ uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x360, x362, &x374);
-{ uint32_t x377; uint8_t x378 = _addcarryx_u32(x375, x363, x365, &x377);
-{ uint32_t x380; uint8_t _ = _addcarryx_u32(0x0, x378, x366, &x380);
-{ uint32_t _; uint8_t x384 = _addcarryx_u32(0x0, x332, x353, &_);
-{ uint32_t x386; uint8_t x387 = _addcarryx_u32(x384, x335, x368, &x386);
-{ uint32_t x389; uint8_t x390 = _addcarryx_u32(x387, x338, x371, &x389);
-{ uint32_t x392; uint8_t x393 = _addcarryx_u32(x390, x341, x374, &x392);
-{ uint32_t x395; uint8_t x396 = _addcarryx_u32(x393, x344, x377, &x395);
-{ uint32_t x398; uint8_t x399 = _addcarryx_u32(x396, x347, x380, &x398);
-{ uint8_t x400 = (x399 + x348);
-{ uint32_t x403; uint32_t x402 = _mulx_u32(x10, x13, &x403);
-{ uint32_t x406; uint32_t x405 = _mulx_u32(x10, x15, &x406);
-{ uint32_t x409; uint32_t x408 = _mulx_u32(x10, x17, &x409);
-{ uint32_t x412; uint32_t x411 = _mulx_u32(x10, x19, &x412);
-{ uint32_t x415; uint32_t x414 = _mulx_u32(x10, x18, &x415);
-{ uint32_t x417; uint8_t x418 = _addcarryx_u32(0x0, x403, x405, &x417);
-{ uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x406, x408, &x420);
-{ uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x409, x411, &x423);
-{ uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x412, x414, &x426);
-{ uint32_t x429; uint8_t _ = _addcarryx_u32(0x0, x427, x415, &x429);
-{ uint32_t x432; uint8_t x433 = _addcarryx_u32(0x0, x386, x402, &x432);
-{ uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x389, x417, &x435);
-{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x392, x420, &x438);
-{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x395, x423, &x441);
-{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x398, x426, &x444);
-{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x400, x429, &x447);
-{ uint32_t _; uint32_t x450 = _mulx_u32(x432, 0xc4ec4ec5, &_);
-{ uint32_t x454; uint32_t x453 = _mulx_u32(x450, 0xfffffff3, &x454);
-{ uint32_t x457; uint32_t x456 = _mulx_u32(x450, 0xffffffff, &x457);
-{ uint32_t x460; uint32_t x459 = _mulx_u32(x450, 0xffffffff, &x460);
-{ uint32_t x463; uint32_t x462 = _mulx_u32(x450, 0xffffffff, &x463);
-{ uint32_t x466; uint32_t x465 = _mulx_u32(x450, 0x1ff, &x466);
-{ uint32_t x468; uint8_t x469 = _addcarryx_u32(0x0, x454, x456, &x468);
-{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x457, x459, &x471);
-{ uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x460, x462, &x474);
-{ uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x463, x465, &x477);
-{ uint32_t x480; uint8_t _ = _addcarryx_u32(0x0, x478, x466, &x480);
-{ uint32_t _; uint8_t x484 = _addcarryx_u32(0x0, x432, x453, &_);
-{ uint32_t x486; uint8_t x487 = _addcarryx_u32(x484, x435, x468, &x486);
-{ uint32_t x489; uint8_t x490 = _addcarryx_u32(x487, x438, x471, &x489);
-{ uint32_t x492; uint8_t x493 = _addcarryx_u32(x490, x441, x474, &x492);
-{ uint32_t x495; uint8_t x496 = _addcarryx_u32(x493, x444, x477, &x495);
-{ uint32_t x498; uint8_t x499 = _addcarryx_u32(x496, x447, x480, &x498);
-{ uint8_t x500 = (x499 + x448);
-{ uint32_t x502; uint8_t x503 = _subborrow_u32(0x0, x486, 0xfffffff3, &x502);
-{ uint32_t x505; uint8_t x506 = _subborrow_u32(x503, x489, 0xffffffff, &x505);
-{ uint32_t x508; uint8_t x509 = _subborrow_u32(x506, x492, 0xffffffff, &x508);
-{ uint32_t x511; uint8_t x512 = _subborrow_u32(x509, x495, 0xffffffff, &x511);
-{ uint32_t x514; uint8_t x515 = _subborrow_u32(x512, x498, 0x1ff, &x514);
-{ uint32_t _; uint8_t x518 = _subborrow_u32(x515, x500, 0x0, &_);
-{ uint32_t x519 = cmovznz(x518, x514, x498);
-{ uint32_t x520 = cmovznz(x518, x511, x495);
-{ uint32_t x521 = cmovznz(x518, x508, x492);
-{ uint32_t x522 = cmovznz(x518, x505, x489);
-{ uint32_t x523 = cmovznz(x518, x502, x486);
-out[0] = x519;
-out[1] = x520;
-out[2] = x521;
-out[3] = x522;
-out[4] = x523;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
+ { uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
+ { uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
+ { uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
+ { uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
+ { uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
+ { uint32_t _; uint32_t x51 = _mulx_u32(x21, 0xc4ec4ec5, &_);
+ { uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xfffffff3, &x55);
+ { uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
+ { uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
+ { uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
+ { uint32_t x67; uint32_t x66 = _mulx_u32(x51, 0x1ff, &x67);
+ { uint32_t x69; uint8_t x70 = _addcarryx_u32(0x0, x55, x57, &x69);
+ { uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x58, x60, &x72);
+ { uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x61, x63, &x75);
+ { uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x64, x66, &x78);
+ { uint32_t x81; uint8_t _ = _addcarryx_u32(0x0, x79, x67, &x81);
+ { uint32_t _; uint8_t x85 = _addcarryx_u32(0x0, x21, x54, &_);
+ { uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x36, x69, &x87);
+ { uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x39, x72, &x90);
+ { uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x42, x75, &x93);
+ { uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x45, x78, &x96);
+ { uint32_t x99; uint8_t x100 = _addcarryx_u32(x97, x48, x81, &x99);
+ { uint32_t x103; uint32_t x102 = _mulx_u32(x7, x13, &x103);
+ { uint32_t x106; uint32_t x105 = _mulx_u32(x7, x15, &x106);
+ { uint32_t x109; uint32_t x108 = _mulx_u32(x7, x17, &x109);
+ { uint32_t x112; uint32_t x111 = _mulx_u32(x7, x19, &x112);
+ { uint32_t x115; uint32_t x114 = _mulx_u32(x7, x18, &x115);
+ { uint32_t x117; uint8_t x118 = _addcarryx_u32(0x0, x103, x105, &x117);
+ { uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
+ { uint32_t x123; uint8_t x124 = _addcarryx_u32(x121, x109, x111, &x123);
+ { uint32_t x126; uint8_t x127 = _addcarryx_u32(x124, x112, x114, &x126);
+ { uint32_t x129; uint8_t _ = _addcarryx_u32(0x0, x127, x115, &x129);
+ { uint32_t x132; uint8_t x133 = _addcarryx_u32(0x0, x87, x102, &x132);
+ { uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
+ { uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
+ { uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x96, x123, &x141);
+ { uint32_t x144; uint8_t x145 = _addcarryx_u32(x142, x99, x126, &x144);
+ { uint32_t x147; uint8_t x148 = _addcarryx_u32(x145, x100, x129, &x147);
+ { uint32_t _; uint32_t x150 = _mulx_u32(x132, 0xc4ec4ec5, &_);
+ { uint32_t x154; uint32_t x153 = _mulx_u32(x150, 0xfffffff3, &x154);
+ { uint32_t x157; uint32_t x156 = _mulx_u32(x150, 0xffffffff, &x157);
+ { uint32_t x160; uint32_t x159 = _mulx_u32(x150, 0xffffffff, &x160);
+ { uint32_t x163; uint32_t x162 = _mulx_u32(x150, 0xffffffff, &x163);
+ { uint32_t x166; uint32_t x165 = _mulx_u32(x150, 0x1ff, &x166);
+ { uint32_t x168; uint8_t x169 = _addcarryx_u32(0x0, x154, x156, &x168);
+ { uint32_t x171; uint8_t x172 = _addcarryx_u32(x169, x157, x159, &x171);
+ { uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x160, x162, &x174);
+ { uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x163, x165, &x177);
+ { uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x166, &x180);
+ { uint32_t _; uint8_t x184 = _addcarryx_u32(0x0, x132, x153, &_);
+ { uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x135, x168, &x186);
+ { uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x138, x171, &x189);
+ { uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x141, x174, &x192);
+ { uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x144, x177, &x195);
+ { uint32_t x198; uint8_t x199 = _addcarryx_u32(x196, x147, x180, &x198);
+ { uint8_t x200 = (x199 + x148);
+ { uint32_t x203; uint32_t x202 = _mulx_u32(x9, x13, &x203);
+ { uint32_t x206; uint32_t x205 = _mulx_u32(x9, x15, &x206);
+ { uint32_t x209; uint32_t x208 = _mulx_u32(x9, x17, &x209);
+ { uint32_t x212; uint32_t x211 = _mulx_u32(x9, x19, &x212);
+ { uint32_t x215; uint32_t x214 = _mulx_u32(x9, x18, &x215);
+ { uint32_t x217; uint8_t x218 = _addcarryx_u32(0x0, x203, x205, &x217);
+ { uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x206, x208, &x220);
+ { uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x209, x211, &x223);
+ { uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x212, x214, &x226);
+ { uint32_t x229; uint8_t _ = _addcarryx_u32(0x0, x227, x215, &x229);
+ { uint32_t x232; uint8_t x233 = _addcarryx_u32(0x0, x186, x202, &x232);
+ { uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x189, x217, &x235);
+ { uint32_t x238; uint8_t x239 = _addcarryx_u32(x236, x192, x220, &x238);
+ { uint32_t x241; uint8_t x242 = _addcarryx_u32(x239, x195, x223, &x241);
+ { uint32_t x244; uint8_t x245 = _addcarryx_u32(x242, x198, x226, &x244);
+ { uint32_t x247; uint8_t x248 = _addcarryx_u32(x245, x200, x229, &x247);
+ { uint32_t _; uint32_t x250 = _mulx_u32(x232, 0xc4ec4ec5, &_);
+ { uint32_t x254; uint32_t x253 = _mulx_u32(x250, 0xfffffff3, &x254);
+ { uint32_t x257; uint32_t x256 = _mulx_u32(x250, 0xffffffff, &x257);
+ { uint32_t x260; uint32_t x259 = _mulx_u32(x250, 0xffffffff, &x260);
+ { uint32_t x263; uint32_t x262 = _mulx_u32(x250, 0xffffffff, &x263);
+ { uint32_t x266; uint32_t x265 = _mulx_u32(x250, 0x1ff, &x266);
+ { uint32_t x268; uint8_t x269 = _addcarryx_u32(0x0, x254, x256, &x268);
+ { uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x257, x259, &x271);
+ { uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x260, x262, &x274);
+ { uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x263, x265, &x277);
+ { uint32_t x280; uint8_t _ = _addcarryx_u32(0x0, x278, x266, &x280);
+ { uint32_t _; uint8_t x284 = _addcarryx_u32(0x0, x232, x253, &_);
+ { uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x235, x268, &x286);
+ { uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x238, x271, &x289);
+ { uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x241, x274, &x292);
+ { uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x244, x277, &x295);
+ { uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x247, x280, &x298);
+ { uint8_t x300 = (x299 + x248);
+ { uint32_t x303; uint32_t x302 = _mulx_u32(x11, x13, &x303);
+ { uint32_t x306; uint32_t x305 = _mulx_u32(x11, x15, &x306);
+ { uint32_t x309; uint32_t x308 = _mulx_u32(x11, x17, &x309);
+ { uint32_t x312; uint32_t x311 = _mulx_u32(x11, x19, &x312);
+ { uint32_t x315; uint32_t x314 = _mulx_u32(x11, x18, &x315);
+ { uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x303, x305, &x317);
+ { uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x306, x308, &x320);
+ { uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x309, x311, &x323);
+ { uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x312, x314, &x326);
+ { uint32_t x329; uint8_t _ = _addcarryx_u32(0x0, x327, x315, &x329);
+ { uint32_t x332; uint8_t x333 = _addcarryx_u32(0x0, x286, x302, &x332);
+ { uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x289, x317, &x335);
+ { uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x292, x320, &x338);
+ { uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x295, x323, &x341);
+ { uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x298, x326, &x344);
+ { uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x300, x329, &x347);
+ { uint32_t _; uint32_t x350 = _mulx_u32(x332, 0xc4ec4ec5, &_);
+ { uint32_t x354; uint32_t x353 = _mulx_u32(x350, 0xfffffff3, &x354);
+ { uint32_t x357; uint32_t x356 = _mulx_u32(x350, 0xffffffff, &x357);
+ { uint32_t x360; uint32_t x359 = _mulx_u32(x350, 0xffffffff, &x360);
+ { uint32_t x363; uint32_t x362 = _mulx_u32(x350, 0xffffffff, &x363);
+ { uint32_t x366; uint32_t x365 = _mulx_u32(x350, 0x1ff, &x366);
+ { uint32_t x368; uint8_t x369 = _addcarryx_u32(0x0, x354, x356, &x368);
+ { uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x357, x359, &x371);
+ { uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x360, x362, &x374);
+ { uint32_t x377; uint8_t x378 = _addcarryx_u32(x375, x363, x365, &x377);
+ { uint32_t x380; uint8_t _ = _addcarryx_u32(0x0, x378, x366, &x380);
+ { uint32_t _; uint8_t x384 = _addcarryx_u32(0x0, x332, x353, &_);
+ { uint32_t x386; uint8_t x387 = _addcarryx_u32(x384, x335, x368, &x386);
+ { uint32_t x389; uint8_t x390 = _addcarryx_u32(x387, x338, x371, &x389);
+ { uint32_t x392; uint8_t x393 = _addcarryx_u32(x390, x341, x374, &x392);
+ { uint32_t x395; uint8_t x396 = _addcarryx_u32(x393, x344, x377, &x395);
+ { uint32_t x398; uint8_t x399 = _addcarryx_u32(x396, x347, x380, &x398);
+ { uint8_t x400 = (x399 + x348);
+ { uint32_t x403; uint32_t x402 = _mulx_u32(x10, x13, &x403);
+ { uint32_t x406; uint32_t x405 = _mulx_u32(x10, x15, &x406);
+ { uint32_t x409; uint32_t x408 = _mulx_u32(x10, x17, &x409);
+ { uint32_t x412; uint32_t x411 = _mulx_u32(x10, x19, &x412);
+ { uint32_t x415; uint32_t x414 = _mulx_u32(x10, x18, &x415);
+ { uint32_t x417; uint8_t x418 = _addcarryx_u32(0x0, x403, x405, &x417);
+ { uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x406, x408, &x420);
+ { uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x409, x411, &x423);
+ { uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x412, x414, &x426);
+ { uint32_t x429; uint8_t _ = _addcarryx_u32(0x0, x427, x415, &x429);
+ { uint32_t x432; uint8_t x433 = _addcarryx_u32(0x0, x386, x402, &x432);
+ { uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x389, x417, &x435);
+ { uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x392, x420, &x438);
+ { uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x395, x423, &x441);
+ { uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x398, x426, &x444);
+ { uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x400, x429, &x447);
+ { uint32_t _; uint32_t x450 = _mulx_u32(x432, 0xc4ec4ec5, &_);
+ { uint32_t x454; uint32_t x453 = _mulx_u32(x450, 0xfffffff3, &x454);
+ { uint32_t x457; uint32_t x456 = _mulx_u32(x450, 0xffffffff, &x457);
+ { uint32_t x460; uint32_t x459 = _mulx_u32(x450, 0xffffffff, &x460);
+ { uint32_t x463; uint32_t x462 = _mulx_u32(x450, 0xffffffff, &x463);
+ { uint32_t x466; uint32_t x465 = _mulx_u32(x450, 0x1ff, &x466);
+ { uint32_t x468; uint8_t x469 = _addcarryx_u32(0x0, x454, x456, &x468);
+ { uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x457, x459, &x471);
+ { uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x460, x462, &x474);
+ { uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x463, x465, &x477);
+ { uint32_t x480; uint8_t _ = _addcarryx_u32(0x0, x478, x466, &x480);
+ { uint32_t _; uint8_t x484 = _addcarryx_u32(0x0, x432, x453, &_);
+ { uint32_t x486; uint8_t x487 = _addcarryx_u32(x484, x435, x468, &x486);
+ { uint32_t x489; uint8_t x490 = _addcarryx_u32(x487, x438, x471, &x489);
+ { uint32_t x492; uint8_t x493 = _addcarryx_u32(x490, x441, x474, &x492);
+ { uint32_t x495; uint8_t x496 = _addcarryx_u32(x493, x444, x477, &x495);
+ { uint32_t x498; uint8_t x499 = _addcarryx_u32(x496, x447, x480, &x498);
+ { uint8_t x500 = (x499 + x448);
+ { uint32_t x502; uint8_t x503 = _subborrow_u32(0x0, x486, 0xfffffff3, &x502);
+ { uint32_t x505; uint8_t x506 = _subborrow_u32(x503, x489, 0xffffffff, &x505);
+ { uint32_t x508; uint8_t x509 = _subborrow_u32(x506, x492, 0xffffffff, &x508);
+ { uint32_t x511; uint8_t x512 = _subborrow_u32(x509, x495, 0xffffffff, &x511);
+ { uint32_t x514; uint8_t x515 = _subborrow_u32(x512, x498, 0x1ff, &x514);
+ { uint32_t _; uint8_t x518 = _subborrow_u32(x515, x500, 0x0, &_);
+ { uint32_t x519 = cmovznz(x518, x514, x498);
+ { uint32_t x520 = cmovznz(x518, x511, x495);
+ { uint32_t x521 = cmovznz(x518, x508, x492);
+ { uint32_t x522 = cmovznz(x518, x505, x489);
+ { uint32_t x523 = cmovznz(x518, x502, x486);
+ out[0] = x523;
+ out[1] = x522;
+ out[2] = x521;
+ out[3] = x520;
+ out[4] = x519;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e137m13/fenz.c b/src/Specific/montgomery32_2e137m13/fenz.c
index 755695e18..5601e8c15 100644
--- a/src/Specific/montgomery32_2e137m13/fenz.c
+++ b/src/Specific/montgomery32_2e137m13/fenz.c
@@ -1,26 +1,13 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x9 = (x8 | x7);
-{ uint32_t x10 = (x6 | x9);
-{ uint32_t x11 = (x4 | x10);
-{ uint32_t x12 = (x2 | x11);
-out[0] = x12;
-}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[5]) {
+ { const uint32_t x7 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x9 = (x8 | x7);
+ { uint32_t x10 = (x6 | x9);
+ { uint32_t x11 = (x4 | x10);
+ { uint32_t x12 = (x2 | x11);
+ out[0] = x12;
+ }}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e137m13/feopp.c b/src/Specific/montgomery32_2e137m13/feopp.c
index 9c760b607..b0be8f604 100644
--- a/src/Specific/montgomery32_2e137m13/feopp.c
+++ b/src/Specific/montgomery32_2e137m13/feopp.c
@@ -1,42 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
-{ uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
-{ uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
-{ uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
-{ uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
-{ uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
-{ uint32_t x25 = (x24 & 0xfffffff3);
-{ uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
-{ uint32_t x29 = (x24 & 0xffffffff);
-{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
-{ uint32_t x33 = (x24 & 0xffffffff);
-{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
-{ uint32_t x37 = (x24 & 0xffffffff);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
-{ uint32_t x41 = (x24 & 0x1ff);
-{ uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-out[4] = x27;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feopp(uint32_t out[5], const uint32_t in1[5]) {
+ { const uint32_t x7 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
+ { uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
+ { uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
+ { uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
+ { uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
+ { uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
+ { uint32_t x25 = (x24 & 0xfffffff3);
+ { uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
+ { uint32_t x29 = (x24 & 0xffffffff);
+ { uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
+ { uint32_t x33 = (x24 & 0xffffffff);
+ { uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
+ { uint32_t x37 = (x24 & 0xffffffff);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
+ { uint32_t x41 = (x24 & 0x1ff);
+ { uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e137m13/fesub.c b/src/Specific/montgomery32_2e137m13/fesub.c
index f59e7ee23..deafb3894 100644
--- a/src/Specific/montgomery32_2e137m13/fesub.c
+++ b/src/Specific/montgomery32_2e137m13/fesub.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
-{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
-{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
-{ uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
-{ uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
-{ uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
-{ uint32_t x36 = (x35 & 0xfffffff3);
-{ uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
-{ uint32_t x40 = (x35 & 0xffffffff);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
-{ uint32_t x44 = (x35 & 0xffffffff);
-{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
-{ uint32_t x48 = (x35 & 0xffffffff);
-{ uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
-{ uint32_t x52 = (x35 & 0x1ff);
-{ uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
-out[0] = x54;
-out[1] = x50;
-out[2] = x46;
-out[3] = x42;
-out[4] = x38;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesub(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
+ { uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
+ { uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
+ { uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
+ { uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
+ { uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
+ { uint32_t x36 = (x35 & 0xfffffff3);
+ { uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
+ { uint32_t x40 = (x35 & 0xffffffff);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
+ { uint32_t x44 = (x35 & 0xffffffff);
+ { uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
+ { uint32_t x48 = (x35 & 0xffffffff);
+ { uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
+ { uint32_t x52 = (x35 & 0x1ff);
+ { uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
+ out[0] = x38;
+ out[1] = x42;
+ out[2] = x46;
+ out[3] = x50;
+ out[4] = x54;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e140m27/feadd.c b/src/Specific/montgomery32_2e140m27/feadd.c
index cd37bdb82..3641d2c2c 100644
--- a/src/Specific/montgomery32_2e140m27/feadd.c
+++ b/src/Specific/montgomery32_2e140m27/feadd.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
-{ uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
-{ uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
-{ uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
-{ uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
-{ uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xffffffe5, &x36);
-{ uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
-{ uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
-{ uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
-{ uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0xfff, &x48);
-{ uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
-{ uint32_t x53 = cmovznz(x52, x48, x33);
-{ uint32_t x54 = cmovznz(x52, x45, x30);
-{ uint32_t x55 = cmovznz(x52, x42, x27);
-{ uint32_t x56 = cmovznz(x52, x39, x24);
-{ uint32_t x57 = cmovznz(x52, x36, x21);
-out[0] = x53;
-out[1] = x54;
-out[2] = x55;
-out[3] = x56;
-out[4] = x57;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feadd(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
+ { uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
+ { uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
+ { uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
+ { uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
+ { uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xffffffe5, &x36);
+ { uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
+ { uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
+ { uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
+ { uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0xfff, &x48);
+ { uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
+ { uint32_t x53 = cmovznz(x52, x48, x33);
+ { uint32_t x54 = cmovznz(x52, x45, x30);
+ { uint32_t x55 = cmovznz(x52, x42, x27);
+ { uint32_t x56 = cmovznz(x52, x39, x24);
+ { uint32_t x57 = cmovznz(x52, x36, x21);
+ out[0] = x57;
+ out[1] = x56;
+ out[2] = x55;
+ out[3] = x54;
+ out[4] = x53;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e140m27/femul.c b/src/Specific/montgomery32_2e140m27/femul.c
index 383adf779..cfb98b534 100644
--- a/src/Specific/montgomery32_2e140m27/femul.c
+++ b/src/Specific/montgomery32_2e140m27/femul.c
@@ -1,200 +1,192 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
-{ uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
-{ uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
-{ uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
-{ uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
-{ uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
-{ uint32_t _; uint32_t x51 = _mulx_u32(x21, 0x684bda13, &_);
-{ uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xffffffe5, &x55);
-{ uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
-{ uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
-{ uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
-{ uint32_t x67; uint32_t x66 = _mulx_u32(x51, 0xfff, &x67);
-{ uint32_t x69; uint8_t x70 = _addcarryx_u32(0x0, x55, x57, &x69);
-{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x58, x60, &x72);
-{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x61, x63, &x75);
-{ uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x64, x66, &x78);
-{ uint32_t x81; uint8_t _ = _addcarryx_u32(0x0, x79, x67, &x81);
-{ uint32_t _; uint8_t x85 = _addcarryx_u32(0x0, x21, x54, &_);
-{ uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x36, x69, &x87);
-{ uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x39, x72, &x90);
-{ uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x42, x75, &x93);
-{ uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x45, x78, &x96);
-{ uint32_t x99; uint8_t x100 = _addcarryx_u32(x97, x48, x81, &x99);
-{ uint32_t x103; uint32_t x102 = _mulx_u32(x7, x13, &x103);
-{ uint32_t x106; uint32_t x105 = _mulx_u32(x7, x15, &x106);
-{ uint32_t x109; uint32_t x108 = _mulx_u32(x7, x17, &x109);
-{ uint32_t x112; uint32_t x111 = _mulx_u32(x7, x19, &x112);
-{ uint32_t x115; uint32_t x114 = _mulx_u32(x7, x18, &x115);
-{ uint32_t x117; uint8_t x118 = _addcarryx_u32(0x0, x103, x105, &x117);
-{ uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
-{ uint32_t x123; uint8_t x124 = _addcarryx_u32(x121, x109, x111, &x123);
-{ uint32_t x126; uint8_t x127 = _addcarryx_u32(x124, x112, x114, &x126);
-{ uint32_t x129; uint8_t _ = _addcarryx_u32(0x0, x127, x115, &x129);
-{ uint32_t x132; uint8_t x133 = _addcarryx_u32(0x0, x87, x102, &x132);
-{ uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
-{ uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
-{ uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x96, x123, &x141);
-{ uint32_t x144; uint8_t x145 = _addcarryx_u32(x142, x99, x126, &x144);
-{ uint32_t x147; uint8_t x148 = _addcarryx_u32(x145, x100, x129, &x147);
-{ uint32_t _; uint32_t x150 = _mulx_u32(x132, 0x684bda13, &_);
-{ uint32_t x154; uint32_t x153 = _mulx_u32(x150, 0xffffffe5, &x154);
-{ uint32_t x157; uint32_t x156 = _mulx_u32(x150, 0xffffffff, &x157);
-{ uint32_t x160; uint32_t x159 = _mulx_u32(x150, 0xffffffff, &x160);
-{ uint32_t x163; uint32_t x162 = _mulx_u32(x150, 0xffffffff, &x163);
-{ uint32_t x166; uint32_t x165 = _mulx_u32(x150, 0xfff, &x166);
-{ uint32_t x168; uint8_t x169 = _addcarryx_u32(0x0, x154, x156, &x168);
-{ uint32_t x171; uint8_t x172 = _addcarryx_u32(x169, x157, x159, &x171);
-{ uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x160, x162, &x174);
-{ uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x163, x165, &x177);
-{ uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x166, &x180);
-{ uint32_t _; uint8_t x184 = _addcarryx_u32(0x0, x132, x153, &_);
-{ uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x135, x168, &x186);
-{ uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x138, x171, &x189);
-{ uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x141, x174, &x192);
-{ uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x144, x177, &x195);
-{ uint32_t x198; uint8_t x199 = _addcarryx_u32(x196, x147, x180, &x198);
-{ uint8_t x200 = (x199 + x148);
-{ uint32_t x203; uint32_t x202 = _mulx_u32(x9, x13, &x203);
-{ uint32_t x206; uint32_t x205 = _mulx_u32(x9, x15, &x206);
-{ uint32_t x209; uint32_t x208 = _mulx_u32(x9, x17, &x209);
-{ uint32_t x212; uint32_t x211 = _mulx_u32(x9, x19, &x212);
-{ uint32_t x215; uint32_t x214 = _mulx_u32(x9, x18, &x215);
-{ uint32_t x217; uint8_t x218 = _addcarryx_u32(0x0, x203, x205, &x217);
-{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x206, x208, &x220);
-{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x209, x211, &x223);
-{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x212, x214, &x226);
-{ uint32_t x229; uint8_t _ = _addcarryx_u32(0x0, x227, x215, &x229);
-{ uint32_t x232; uint8_t x233 = _addcarryx_u32(0x0, x186, x202, &x232);
-{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x189, x217, &x235);
-{ uint32_t x238; uint8_t x239 = _addcarryx_u32(x236, x192, x220, &x238);
-{ uint32_t x241; uint8_t x242 = _addcarryx_u32(x239, x195, x223, &x241);
-{ uint32_t x244; uint8_t x245 = _addcarryx_u32(x242, x198, x226, &x244);
-{ uint32_t x247; uint8_t x248 = _addcarryx_u32(x245, x200, x229, &x247);
-{ uint32_t _; uint32_t x250 = _mulx_u32(x232, 0x684bda13, &_);
-{ uint32_t x254; uint32_t x253 = _mulx_u32(x250, 0xffffffe5, &x254);
-{ uint32_t x257; uint32_t x256 = _mulx_u32(x250, 0xffffffff, &x257);
-{ uint32_t x260; uint32_t x259 = _mulx_u32(x250, 0xffffffff, &x260);
-{ uint32_t x263; uint32_t x262 = _mulx_u32(x250, 0xffffffff, &x263);
-{ uint32_t x266; uint32_t x265 = _mulx_u32(x250, 0xfff, &x266);
-{ uint32_t x268; uint8_t x269 = _addcarryx_u32(0x0, x254, x256, &x268);
-{ uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x257, x259, &x271);
-{ uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x260, x262, &x274);
-{ uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x263, x265, &x277);
-{ uint32_t x280; uint8_t _ = _addcarryx_u32(0x0, x278, x266, &x280);
-{ uint32_t _; uint8_t x284 = _addcarryx_u32(0x0, x232, x253, &_);
-{ uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x235, x268, &x286);
-{ uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x238, x271, &x289);
-{ uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x241, x274, &x292);
-{ uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x244, x277, &x295);
-{ uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x247, x280, &x298);
-{ uint8_t x300 = (x299 + x248);
-{ uint32_t x303; uint32_t x302 = _mulx_u32(x11, x13, &x303);
-{ uint32_t x306; uint32_t x305 = _mulx_u32(x11, x15, &x306);
-{ uint32_t x309; uint32_t x308 = _mulx_u32(x11, x17, &x309);
-{ uint32_t x312; uint32_t x311 = _mulx_u32(x11, x19, &x312);
-{ uint32_t x315; uint32_t x314 = _mulx_u32(x11, x18, &x315);
-{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x303, x305, &x317);
-{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x306, x308, &x320);
-{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x309, x311, &x323);
-{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x312, x314, &x326);
-{ uint32_t x329; uint8_t _ = _addcarryx_u32(0x0, x327, x315, &x329);
-{ uint32_t x332; uint8_t x333 = _addcarryx_u32(0x0, x286, x302, &x332);
-{ uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x289, x317, &x335);
-{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x292, x320, &x338);
-{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x295, x323, &x341);
-{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x298, x326, &x344);
-{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x300, x329, &x347);
-{ uint32_t _; uint32_t x350 = _mulx_u32(x332, 0x684bda13, &_);
-{ uint32_t x354; uint32_t x353 = _mulx_u32(x350, 0xffffffe5, &x354);
-{ uint32_t x357; uint32_t x356 = _mulx_u32(x350, 0xffffffff, &x357);
-{ uint32_t x360; uint32_t x359 = _mulx_u32(x350, 0xffffffff, &x360);
-{ uint32_t x363; uint32_t x362 = _mulx_u32(x350, 0xffffffff, &x363);
-{ uint32_t x366; uint32_t x365 = _mulx_u32(x350, 0xfff, &x366);
-{ uint32_t x368; uint8_t x369 = _addcarryx_u32(0x0, x354, x356, &x368);
-{ uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x357, x359, &x371);
-{ uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x360, x362, &x374);
-{ uint32_t x377; uint8_t x378 = _addcarryx_u32(x375, x363, x365, &x377);
-{ uint32_t x380; uint8_t _ = _addcarryx_u32(0x0, x378, x366, &x380);
-{ uint32_t _; uint8_t x384 = _addcarryx_u32(0x0, x332, x353, &_);
-{ uint32_t x386; uint8_t x387 = _addcarryx_u32(x384, x335, x368, &x386);
-{ uint32_t x389; uint8_t x390 = _addcarryx_u32(x387, x338, x371, &x389);
-{ uint32_t x392; uint8_t x393 = _addcarryx_u32(x390, x341, x374, &x392);
-{ uint32_t x395; uint8_t x396 = _addcarryx_u32(x393, x344, x377, &x395);
-{ uint32_t x398; uint8_t x399 = _addcarryx_u32(x396, x347, x380, &x398);
-{ uint8_t x400 = (x399 + x348);
-{ uint32_t x403; uint32_t x402 = _mulx_u32(x10, x13, &x403);
-{ uint32_t x406; uint32_t x405 = _mulx_u32(x10, x15, &x406);
-{ uint32_t x409; uint32_t x408 = _mulx_u32(x10, x17, &x409);
-{ uint32_t x412; uint32_t x411 = _mulx_u32(x10, x19, &x412);
-{ uint32_t x415; uint32_t x414 = _mulx_u32(x10, x18, &x415);
-{ uint32_t x417; uint8_t x418 = _addcarryx_u32(0x0, x403, x405, &x417);
-{ uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x406, x408, &x420);
-{ uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x409, x411, &x423);
-{ uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x412, x414, &x426);
-{ uint32_t x429; uint8_t _ = _addcarryx_u32(0x0, x427, x415, &x429);
-{ uint32_t x432; uint8_t x433 = _addcarryx_u32(0x0, x386, x402, &x432);
-{ uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x389, x417, &x435);
-{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x392, x420, &x438);
-{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x395, x423, &x441);
-{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x398, x426, &x444);
-{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x400, x429, &x447);
-{ uint32_t _; uint32_t x450 = _mulx_u32(x432, 0x684bda13, &_);
-{ uint32_t x454; uint32_t x453 = _mulx_u32(x450, 0xffffffe5, &x454);
-{ uint32_t x457; uint32_t x456 = _mulx_u32(x450, 0xffffffff, &x457);
-{ uint32_t x460; uint32_t x459 = _mulx_u32(x450, 0xffffffff, &x460);
-{ uint32_t x463; uint32_t x462 = _mulx_u32(x450, 0xffffffff, &x463);
-{ uint32_t x466; uint32_t x465 = _mulx_u32(x450, 0xfff, &x466);
-{ uint32_t x468; uint8_t x469 = _addcarryx_u32(0x0, x454, x456, &x468);
-{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x457, x459, &x471);
-{ uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x460, x462, &x474);
-{ uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x463, x465, &x477);
-{ uint32_t x480; uint8_t _ = _addcarryx_u32(0x0, x478, x466, &x480);
-{ uint32_t _; uint8_t x484 = _addcarryx_u32(0x0, x432, x453, &_);
-{ uint32_t x486; uint8_t x487 = _addcarryx_u32(x484, x435, x468, &x486);
-{ uint32_t x489; uint8_t x490 = _addcarryx_u32(x487, x438, x471, &x489);
-{ uint32_t x492; uint8_t x493 = _addcarryx_u32(x490, x441, x474, &x492);
-{ uint32_t x495; uint8_t x496 = _addcarryx_u32(x493, x444, x477, &x495);
-{ uint32_t x498; uint8_t x499 = _addcarryx_u32(x496, x447, x480, &x498);
-{ uint8_t x500 = (x499 + x448);
-{ uint32_t x502; uint8_t x503 = _subborrow_u32(0x0, x486, 0xffffffe5, &x502);
-{ uint32_t x505; uint8_t x506 = _subborrow_u32(x503, x489, 0xffffffff, &x505);
-{ uint32_t x508; uint8_t x509 = _subborrow_u32(x506, x492, 0xffffffff, &x508);
-{ uint32_t x511; uint8_t x512 = _subborrow_u32(x509, x495, 0xffffffff, &x511);
-{ uint32_t x514; uint8_t x515 = _subborrow_u32(x512, x498, 0xfff, &x514);
-{ uint32_t _; uint8_t x518 = _subborrow_u32(x515, x500, 0x0, &_);
-{ uint32_t x519 = cmovznz(x518, x514, x498);
-{ uint32_t x520 = cmovznz(x518, x511, x495);
-{ uint32_t x521 = cmovznz(x518, x508, x492);
-{ uint32_t x522 = cmovznz(x518, x505, x489);
-{ uint32_t x523 = cmovznz(x518, x502, x486);
-out[0] = x519;
-out[1] = x520;
-out[2] = x521;
-out[3] = x522;
-out[4] = x523;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
+ { uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
+ { uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
+ { uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
+ { uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
+ { uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
+ { uint32_t _; uint32_t x51 = _mulx_u32(x21, 0x684bda13, &_);
+ { uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xffffffe5, &x55);
+ { uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
+ { uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
+ { uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
+ { uint32_t x67; uint32_t x66 = _mulx_u32(x51, 0xfff, &x67);
+ { uint32_t x69; uint8_t x70 = _addcarryx_u32(0x0, x55, x57, &x69);
+ { uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x58, x60, &x72);
+ { uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x61, x63, &x75);
+ { uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x64, x66, &x78);
+ { uint32_t x81; uint8_t _ = _addcarryx_u32(0x0, x79, x67, &x81);
+ { uint32_t _; uint8_t x85 = _addcarryx_u32(0x0, x21, x54, &_);
+ { uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x36, x69, &x87);
+ { uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x39, x72, &x90);
+ { uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x42, x75, &x93);
+ { uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x45, x78, &x96);
+ { uint32_t x99; uint8_t x100 = _addcarryx_u32(x97, x48, x81, &x99);
+ { uint32_t x103; uint32_t x102 = _mulx_u32(x7, x13, &x103);
+ { uint32_t x106; uint32_t x105 = _mulx_u32(x7, x15, &x106);
+ { uint32_t x109; uint32_t x108 = _mulx_u32(x7, x17, &x109);
+ { uint32_t x112; uint32_t x111 = _mulx_u32(x7, x19, &x112);
+ { uint32_t x115; uint32_t x114 = _mulx_u32(x7, x18, &x115);
+ { uint32_t x117; uint8_t x118 = _addcarryx_u32(0x0, x103, x105, &x117);
+ { uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
+ { uint32_t x123; uint8_t x124 = _addcarryx_u32(x121, x109, x111, &x123);
+ { uint32_t x126; uint8_t x127 = _addcarryx_u32(x124, x112, x114, &x126);
+ { uint32_t x129; uint8_t _ = _addcarryx_u32(0x0, x127, x115, &x129);
+ { uint32_t x132; uint8_t x133 = _addcarryx_u32(0x0, x87, x102, &x132);
+ { uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
+ { uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
+ { uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x96, x123, &x141);
+ { uint32_t x144; uint8_t x145 = _addcarryx_u32(x142, x99, x126, &x144);
+ { uint32_t x147; uint8_t x148 = _addcarryx_u32(x145, x100, x129, &x147);
+ { uint32_t _; uint32_t x150 = _mulx_u32(x132, 0x684bda13, &_);
+ { uint32_t x154; uint32_t x153 = _mulx_u32(x150, 0xffffffe5, &x154);
+ { uint32_t x157; uint32_t x156 = _mulx_u32(x150, 0xffffffff, &x157);
+ { uint32_t x160; uint32_t x159 = _mulx_u32(x150, 0xffffffff, &x160);
+ { uint32_t x163; uint32_t x162 = _mulx_u32(x150, 0xffffffff, &x163);
+ { uint32_t x166; uint32_t x165 = _mulx_u32(x150, 0xfff, &x166);
+ { uint32_t x168; uint8_t x169 = _addcarryx_u32(0x0, x154, x156, &x168);
+ { uint32_t x171; uint8_t x172 = _addcarryx_u32(x169, x157, x159, &x171);
+ { uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x160, x162, &x174);
+ { uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x163, x165, &x177);
+ { uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x166, &x180);
+ { uint32_t _; uint8_t x184 = _addcarryx_u32(0x0, x132, x153, &_);
+ { uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x135, x168, &x186);
+ { uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x138, x171, &x189);
+ { uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x141, x174, &x192);
+ { uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x144, x177, &x195);
+ { uint32_t x198; uint8_t x199 = _addcarryx_u32(x196, x147, x180, &x198);
+ { uint8_t x200 = (x199 + x148);
+ { uint32_t x203; uint32_t x202 = _mulx_u32(x9, x13, &x203);
+ { uint32_t x206; uint32_t x205 = _mulx_u32(x9, x15, &x206);
+ { uint32_t x209; uint32_t x208 = _mulx_u32(x9, x17, &x209);
+ { uint32_t x212; uint32_t x211 = _mulx_u32(x9, x19, &x212);
+ { uint32_t x215; uint32_t x214 = _mulx_u32(x9, x18, &x215);
+ { uint32_t x217; uint8_t x218 = _addcarryx_u32(0x0, x203, x205, &x217);
+ { uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x206, x208, &x220);
+ { uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x209, x211, &x223);
+ { uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x212, x214, &x226);
+ { uint32_t x229; uint8_t _ = _addcarryx_u32(0x0, x227, x215, &x229);
+ { uint32_t x232; uint8_t x233 = _addcarryx_u32(0x0, x186, x202, &x232);
+ { uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x189, x217, &x235);
+ { uint32_t x238; uint8_t x239 = _addcarryx_u32(x236, x192, x220, &x238);
+ { uint32_t x241; uint8_t x242 = _addcarryx_u32(x239, x195, x223, &x241);
+ { uint32_t x244; uint8_t x245 = _addcarryx_u32(x242, x198, x226, &x244);
+ { uint32_t x247; uint8_t x248 = _addcarryx_u32(x245, x200, x229, &x247);
+ { uint32_t _; uint32_t x250 = _mulx_u32(x232, 0x684bda13, &_);
+ { uint32_t x254; uint32_t x253 = _mulx_u32(x250, 0xffffffe5, &x254);
+ { uint32_t x257; uint32_t x256 = _mulx_u32(x250, 0xffffffff, &x257);
+ { uint32_t x260; uint32_t x259 = _mulx_u32(x250, 0xffffffff, &x260);
+ { uint32_t x263; uint32_t x262 = _mulx_u32(x250, 0xffffffff, &x263);
+ { uint32_t x266; uint32_t x265 = _mulx_u32(x250, 0xfff, &x266);
+ { uint32_t x268; uint8_t x269 = _addcarryx_u32(0x0, x254, x256, &x268);
+ { uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x257, x259, &x271);
+ { uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x260, x262, &x274);
+ { uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x263, x265, &x277);
+ { uint32_t x280; uint8_t _ = _addcarryx_u32(0x0, x278, x266, &x280);
+ { uint32_t _; uint8_t x284 = _addcarryx_u32(0x0, x232, x253, &_);
+ { uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x235, x268, &x286);
+ { uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x238, x271, &x289);
+ { uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x241, x274, &x292);
+ { uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x244, x277, &x295);
+ { uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x247, x280, &x298);
+ { uint8_t x300 = (x299 + x248);
+ { uint32_t x303; uint32_t x302 = _mulx_u32(x11, x13, &x303);
+ { uint32_t x306; uint32_t x305 = _mulx_u32(x11, x15, &x306);
+ { uint32_t x309; uint32_t x308 = _mulx_u32(x11, x17, &x309);
+ { uint32_t x312; uint32_t x311 = _mulx_u32(x11, x19, &x312);
+ { uint32_t x315; uint32_t x314 = _mulx_u32(x11, x18, &x315);
+ { uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x303, x305, &x317);
+ { uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x306, x308, &x320);
+ { uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x309, x311, &x323);
+ { uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x312, x314, &x326);
+ { uint32_t x329; uint8_t _ = _addcarryx_u32(0x0, x327, x315, &x329);
+ { uint32_t x332; uint8_t x333 = _addcarryx_u32(0x0, x286, x302, &x332);
+ { uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x289, x317, &x335);
+ { uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x292, x320, &x338);
+ { uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x295, x323, &x341);
+ { uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x298, x326, &x344);
+ { uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x300, x329, &x347);
+ { uint32_t _; uint32_t x350 = _mulx_u32(x332, 0x684bda13, &_);
+ { uint32_t x354; uint32_t x353 = _mulx_u32(x350, 0xffffffe5, &x354);
+ { uint32_t x357; uint32_t x356 = _mulx_u32(x350, 0xffffffff, &x357);
+ { uint32_t x360; uint32_t x359 = _mulx_u32(x350, 0xffffffff, &x360);
+ { uint32_t x363; uint32_t x362 = _mulx_u32(x350, 0xffffffff, &x363);
+ { uint32_t x366; uint32_t x365 = _mulx_u32(x350, 0xfff, &x366);
+ { uint32_t x368; uint8_t x369 = _addcarryx_u32(0x0, x354, x356, &x368);
+ { uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x357, x359, &x371);
+ { uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x360, x362, &x374);
+ { uint32_t x377; uint8_t x378 = _addcarryx_u32(x375, x363, x365, &x377);
+ { uint32_t x380; uint8_t _ = _addcarryx_u32(0x0, x378, x366, &x380);
+ { uint32_t _; uint8_t x384 = _addcarryx_u32(0x0, x332, x353, &_);
+ { uint32_t x386; uint8_t x387 = _addcarryx_u32(x384, x335, x368, &x386);
+ { uint32_t x389; uint8_t x390 = _addcarryx_u32(x387, x338, x371, &x389);
+ { uint32_t x392; uint8_t x393 = _addcarryx_u32(x390, x341, x374, &x392);
+ { uint32_t x395; uint8_t x396 = _addcarryx_u32(x393, x344, x377, &x395);
+ { uint32_t x398; uint8_t x399 = _addcarryx_u32(x396, x347, x380, &x398);
+ { uint8_t x400 = (x399 + x348);
+ { uint32_t x403; uint32_t x402 = _mulx_u32(x10, x13, &x403);
+ { uint32_t x406; uint32_t x405 = _mulx_u32(x10, x15, &x406);
+ { uint32_t x409; uint32_t x408 = _mulx_u32(x10, x17, &x409);
+ { uint32_t x412; uint32_t x411 = _mulx_u32(x10, x19, &x412);
+ { uint32_t x415; uint32_t x414 = _mulx_u32(x10, x18, &x415);
+ { uint32_t x417; uint8_t x418 = _addcarryx_u32(0x0, x403, x405, &x417);
+ { uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x406, x408, &x420);
+ { uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x409, x411, &x423);
+ { uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x412, x414, &x426);
+ { uint32_t x429; uint8_t _ = _addcarryx_u32(0x0, x427, x415, &x429);
+ { uint32_t x432; uint8_t x433 = _addcarryx_u32(0x0, x386, x402, &x432);
+ { uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x389, x417, &x435);
+ { uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x392, x420, &x438);
+ { uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x395, x423, &x441);
+ { uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x398, x426, &x444);
+ { uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x400, x429, &x447);
+ { uint32_t _; uint32_t x450 = _mulx_u32(x432, 0x684bda13, &_);
+ { uint32_t x454; uint32_t x453 = _mulx_u32(x450, 0xffffffe5, &x454);
+ { uint32_t x457; uint32_t x456 = _mulx_u32(x450, 0xffffffff, &x457);
+ { uint32_t x460; uint32_t x459 = _mulx_u32(x450, 0xffffffff, &x460);
+ { uint32_t x463; uint32_t x462 = _mulx_u32(x450, 0xffffffff, &x463);
+ { uint32_t x466; uint32_t x465 = _mulx_u32(x450, 0xfff, &x466);
+ { uint32_t x468; uint8_t x469 = _addcarryx_u32(0x0, x454, x456, &x468);
+ { uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x457, x459, &x471);
+ { uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x460, x462, &x474);
+ { uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x463, x465, &x477);
+ { uint32_t x480; uint8_t _ = _addcarryx_u32(0x0, x478, x466, &x480);
+ { uint32_t _; uint8_t x484 = _addcarryx_u32(0x0, x432, x453, &_);
+ { uint32_t x486; uint8_t x487 = _addcarryx_u32(x484, x435, x468, &x486);
+ { uint32_t x489; uint8_t x490 = _addcarryx_u32(x487, x438, x471, &x489);
+ { uint32_t x492; uint8_t x493 = _addcarryx_u32(x490, x441, x474, &x492);
+ { uint32_t x495; uint8_t x496 = _addcarryx_u32(x493, x444, x477, &x495);
+ { uint32_t x498; uint8_t x499 = _addcarryx_u32(x496, x447, x480, &x498);
+ { uint8_t x500 = (x499 + x448);
+ { uint32_t x502; uint8_t x503 = _subborrow_u32(0x0, x486, 0xffffffe5, &x502);
+ { uint32_t x505; uint8_t x506 = _subborrow_u32(x503, x489, 0xffffffff, &x505);
+ { uint32_t x508; uint8_t x509 = _subborrow_u32(x506, x492, 0xffffffff, &x508);
+ { uint32_t x511; uint8_t x512 = _subborrow_u32(x509, x495, 0xffffffff, &x511);
+ { uint32_t x514; uint8_t x515 = _subborrow_u32(x512, x498, 0xfff, &x514);
+ { uint32_t _; uint8_t x518 = _subborrow_u32(x515, x500, 0x0, &_);
+ { uint32_t x519 = cmovznz(x518, x514, x498);
+ { uint32_t x520 = cmovznz(x518, x511, x495);
+ { uint32_t x521 = cmovznz(x518, x508, x492);
+ { uint32_t x522 = cmovznz(x518, x505, x489);
+ { uint32_t x523 = cmovznz(x518, x502, x486);
+ out[0] = x523;
+ out[1] = x522;
+ out[2] = x521;
+ out[3] = x520;
+ out[4] = x519;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e140m27/fenz.c b/src/Specific/montgomery32_2e140m27/fenz.c
index 755695e18..5601e8c15 100644
--- a/src/Specific/montgomery32_2e140m27/fenz.c
+++ b/src/Specific/montgomery32_2e140m27/fenz.c
@@ -1,26 +1,13 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x9 = (x8 | x7);
-{ uint32_t x10 = (x6 | x9);
-{ uint32_t x11 = (x4 | x10);
-{ uint32_t x12 = (x2 | x11);
-out[0] = x12;
-}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[5]) {
+ { const uint32_t x7 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x9 = (x8 | x7);
+ { uint32_t x10 = (x6 | x9);
+ { uint32_t x11 = (x4 | x10);
+ { uint32_t x12 = (x2 | x11);
+ out[0] = x12;
+ }}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e140m27/feopp.c b/src/Specific/montgomery32_2e140m27/feopp.c
index 0044def60..c9cb1b8e7 100644
--- a/src/Specific/montgomery32_2e140m27/feopp.c
+++ b/src/Specific/montgomery32_2e140m27/feopp.c
@@ -1,42 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
-{ uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
-{ uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
-{ uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
-{ uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
-{ uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
-{ uint32_t x25 = (x24 & 0xffffffe5);
-{ uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
-{ uint32_t x29 = (x24 & 0xffffffff);
-{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
-{ uint32_t x33 = (x24 & 0xffffffff);
-{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
-{ uint32_t x37 = (x24 & 0xffffffff);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
-{ uint32_t x41 = (x24 & 0xfff);
-{ uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-out[4] = x27;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feopp(uint32_t out[5], const uint32_t in1[5]) {
+ { const uint32_t x7 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
+ { uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
+ { uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
+ { uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
+ { uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
+ { uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
+ { uint32_t x25 = (x24 & 0xffffffe5);
+ { uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
+ { uint32_t x29 = (x24 & 0xffffffff);
+ { uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
+ { uint32_t x33 = (x24 & 0xffffffff);
+ { uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
+ { uint32_t x37 = (x24 & 0xffffffff);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
+ { uint32_t x41 = (x24 & 0xfff);
+ { uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e140m27/fesub.c b/src/Specific/montgomery32_2e140m27/fesub.c
index e3d1e0f7b..c8ffc90c5 100644
--- a/src/Specific/montgomery32_2e140m27/fesub.c
+++ b/src/Specific/montgomery32_2e140m27/fesub.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
-{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
-{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
-{ uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
-{ uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
-{ uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
-{ uint32_t x36 = (x35 & 0xffffffe5);
-{ uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
-{ uint32_t x40 = (x35 & 0xffffffff);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
-{ uint32_t x44 = (x35 & 0xffffffff);
-{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
-{ uint32_t x48 = (x35 & 0xffffffff);
-{ uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
-{ uint32_t x52 = (x35 & 0xfff);
-{ uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
-out[0] = x54;
-out[1] = x50;
-out[2] = x46;
-out[3] = x42;
-out[4] = x38;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesub(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
+ { uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
+ { uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
+ { uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
+ { uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
+ { uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
+ { uint32_t x36 = (x35 & 0xffffffe5);
+ { uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
+ { uint32_t x40 = (x35 & 0xffffffff);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
+ { uint32_t x44 = (x35 & 0xffffffff);
+ { uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
+ { uint32_t x48 = (x35 & 0xffffffff);
+ { uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
+ { uint32_t x52 = (x35 & 0xfff);
+ { uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
+ out[0] = x38;
+ out[1] = x42;
+ out[2] = x46;
+ out[3] = x50;
+ out[4] = x54;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e141m9/feadd.c b/src/Specific/montgomery32_2e141m9/feadd.c
index 96afc57a3..d3b03cb4c 100644
--- a/src/Specific/montgomery32_2e141m9/feadd.c
+++ b/src/Specific/montgomery32_2e141m9/feadd.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
-{ uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
-{ uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
-{ uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
-{ uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
-{ uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xfffffff7, &x36);
-{ uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
-{ uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
-{ uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
-{ uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0x1fff, &x48);
-{ uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
-{ uint32_t x53 = cmovznz(x52, x48, x33);
-{ uint32_t x54 = cmovznz(x52, x45, x30);
-{ uint32_t x55 = cmovznz(x52, x42, x27);
-{ uint32_t x56 = cmovznz(x52, x39, x24);
-{ uint32_t x57 = cmovznz(x52, x36, x21);
-out[0] = x53;
-out[1] = x54;
-out[2] = x55;
-out[3] = x56;
-out[4] = x57;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feadd(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
+ { uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
+ { uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
+ { uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
+ { uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
+ { uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xfffffff7, &x36);
+ { uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
+ { uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
+ { uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
+ { uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0x1fff, &x48);
+ { uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
+ { uint32_t x53 = cmovznz(x52, x48, x33);
+ { uint32_t x54 = cmovznz(x52, x45, x30);
+ { uint32_t x55 = cmovznz(x52, x42, x27);
+ { uint32_t x56 = cmovznz(x52, x39, x24);
+ { uint32_t x57 = cmovznz(x52, x36, x21);
+ out[0] = x57;
+ out[1] = x56;
+ out[2] = x55;
+ out[3] = x54;
+ out[4] = x53;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e141m9/femul.c b/src/Specific/montgomery32_2e141m9/femul.c
index 1b4a3b2c0..cdf89d1fa 100644
--- a/src/Specific/montgomery32_2e141m9/femul.c
+++ b/src/Specific/montgomery32_2e141m9/femul.c
@@ -1,200 +1,192 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
-{ uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
-{ uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
-{ uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
-{ uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
-{ uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
-{ uint32_t _; uint32_t x51 = _mulx_u32(x21, 0x38e38e39, &_);
-{ uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xfffffff7, &x55);
-{ uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
-{ uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
-{ uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
-{ uint32_t x67; uint32_t x66 = _mulx_u32(x51, 0x1fff, &x67);
-{ uint32_t x69; uint8_t x70 = _addcarryx_u32(0x0, x55, x57, &x69);
-{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x58, x60, &x72);
-{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x61, x63, &x75);
-{ uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x64, x66, &x78);
-{ uint32_t x81; uint8_t _ = _addcarryx_u32(0x0, x79, x67, &x81);
-{ uint32_t _; uint8_t x85 = _addcarryx_u32(0x0, x21, x54, &_);
-{ uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x36, x69, &x87);
-{ uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x39, x72, &x90);
-{ uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x42, x75, &x93);
-{ uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x45, x78, &x96);
-{ uint32_t x99; uint8_t x100 = _addcarryx_u32(x97, x48, x81, &x99);
-{ uint32_t x103; uint32_t x102 = _mulx_u32(x7, x13, &x103);
-{ uint32_t x106; uint32_t x105 = _mulx_u32(x7, x15, &x106);
-{ uint32_t x109; uint32_t x108 = _mulx_u32(x7, x17, &x109);
-{ uint32_t x112; uint32_t x111 = _mulx_u32(x7, x19, &x112);
-{ uint32_t x115; uint32_t x114 = _mulx_u32(x7, x18, &x115);
-{ uint32_t x117; uint8_t x118 = _addcarryx_u32(0x0, x103, x105, &x117);
-{ uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
-{ uint32_t x123; uint8_t x124 = _addcarryx_u32(x121, x109, x111, &x123);
-{ uint32_t x126; uint8_t x127 = _addcarryx_u32(x124, x112, x114, &x126);
-{ uint32_t x129; uint8_t _ = _addcarryx_u32(0x0, x127, x115, &x129);
-{ uint32_t x132; uint8_t x133 = _addcarryx_u32(0x0, x87, x102, &x132);
-{ uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
-{ uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
-{ uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x96, x123, &x141);
-{ uint32_t x144; uint8_t x145 = _addcarryx_u32(x142, x99, x126, &x144);
-{ uint32_t x147; uint8_t x148 = _addcarryx_u32(x145, x100, x129, &x147);
-{ uint32_t _; uint32_t x150 = _mulx_u32(x132, 0x38e38e39, &_);
-{ uint32_t x154; uint32_t x153 = _mulx_u32(x150, 0xfffffff7, &x154);
-{ uint32_t x157; uint32_t x156 = _mulx_u32(x150, 0xffffffff, &x157);
-{ uint32_t x160; uint32_t x159 = _mulx_u32(x150, 0xffffffff, &x160);
-{ uint32_t x163; uint32_t x162 = _mulx_u32(x150, 0xffffffff, &x163);
-{ uint32_t x166; uint32_t x165 = _mulx_u32(x150, 0x1fff, &x166);
-{ uint32_t x168; uint8_t x169 = _addcarryx_u32(0x0, x154, x156, &x168);
-{ uint32_t x171; uint8_t x172 = _addcarryx_u32(x169, x157, x159, &x171);
-{ uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x160, x162, &x174);
-{ uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x163, x165, &x177);
-{ uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x166, &x180);
-{ uint32_t _; uint8_t x184 = _addcarryx_u32(0x0, x132, x153, &_);
-{ uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x135, x168, &x186);
-{ uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x138, x171, &x189);
-{ uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x141, x174, &x192);
-{ uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x144, x177, &x195);
-{ uint32_t x198; uint8_t x199 = _addcarryx_u32(x196, x147, x180, &x198);
-{ uint8_t x200 = (x199 + x148);
-{ uint32_t x203; uint32_t x202 = _mulx_u32(x9, x13, &x203);
-{ uint32_t x206; uint32_t x205 = _mulx_u32(x9, x15, &x206);
-{ uint32_t x209; uint32_t x208 = _mulx_u32(x9, x17, &x209);
-{ uint32_t x212; uint32_t x211 = _mulx_u32(x9, x19, &x212);
-{ uint32_t x215; uint32_t x214 = _mulx_u32(x9, x18, &x215);
-{ uint32_t x217; uint8_t x218 = _addcarryx_u32(0x0, x203, x205, &x217);
-{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x206, x208, &x220);
-{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x209, x211, &x223);
-{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x212, x214, &x226);
-{ uint32_t x229; uint8_t _ = _addcarryx_u32(0x0, x227, x215, &x229);
-{ uint32_t x232; uint8_t x233 = _addcarryx_u32(0x0, x186, x202, &x232);
-{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x189, x217, &x235);
-{ uint32_t x238; uint8_t x239 = _addcarryx_u32(x236, x192, x220, &x238);
-{ uint32_t x241; uint8_t x242 = _addcarryx_u32(x239, x195, x223, &x241);
-{ uint32_t x244; uint8_t x245 = _addcarryx_u32(x242, x198, x226, &x244);
-{ uint32_t x247; uint8_t x248 = _addcarryx_u32(x245, x200, x229, &x247);
-{ uint32_t _; uint32_t x250 = _mulx_u32(x232, 0x38e38e39, &_);
-{ uint32_t x254; uint32_t x253 = _mulx_u32(x250, 0xfffffff7, &x254);
-{ uint32_t x257; uint32_t x256 = _mulx_u32(x250, 0xffffffff, &x257);
-{ uint32_t x260; uint32_t x259 = _mulx_u32(x250, 0xffffffff, &x260);
-{ uint32_t x263; uint32_t x262 = _mulx_u32(x250, 0xffffffff, &x263);
-{ uint32_t x266; uint32_t x265 = _mulx_u32(x250, 0x1fff, &x266);
-{ uint32_t x268; uint8_t x269 = _addcarryx_u32(0x0, x254, x256, &x268);
-{ uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x257, x259, &x271);
-{ uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x260, x262, &x274);
-{ uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x263, x265, &x277);
-{ uint32_t x280; uint8_t _ = _addcarryx_u32(0x0, x278, x266, &x280);
-{ uint32_t _; uint8_t x284 = _addcarryx_u32(0x0, x232, x253, &_);
-{ uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x235, x268, &x286);
-{ uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x238, x271, &x289);
-{ uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x241, x274, &x292);
-{ uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x244, x277, &x295);
-{ uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x247, x280, &x298);
-{ uint8_t x300 = (x299 + x248);
-{ uint32_t x303; uint32_t x302 = _mulx_u32(x11, x13, &x303);
-{ uint32_t x306; uint32_t x305 = _mulx_u32(x11, x15, &x306);
-{ uint32_t x309; uint32_t x308 = _mulx_u32(x11, x17, &x309);
-{ uint32_t x312; uint32_t x311 = _mulx_u32(x11, x19, &x312);
-{ uint32_t x315; uint32_t x314 = _mulx_u32(x11, x18, &x315);
-{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x303, x305, &x317);
-{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x306, x308, &x320);
-{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x309, x311, &x323);
-{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x312, x314, &x326);
-{ uint32_t x329; uint8_t _ = _addcarryx_u32(0x0, x327, x315, &x329);
-{ uint32_t x332; uint8_t x333 = _addcarryx_u32(0x0, x286, x302, &x332);
-{ uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x289, x317, &x335);
-{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x292, x320, &x338);
-{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x295, x323, &x341);
-{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x298, x326, &x344);
-{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x300, x329, &x347);
-{ uint32_t _; uint32_t x350 = _mulx_u32(x332, 0x38e38e39, &_);
-{ uint32_t x354; uint32_t x353 = _mulx_u32(x350, 0xfffffff7, &x354);
-{ uint32_t x357; uint32_t x356 = _mulx_u32(x350, 0xffffffff, &x357);
-{ uint32_t x360; uint32_t x359 = _mulx_u32(x350, 0xffffffff, &x360);
-{ uint32_t x363; uint32_t x362 = _mulx_u32(x350, 0xffffffff, &x363);
-{ uint32_t x366; uint32_t x365 = _mulx_u32(x350, 0x1fff, &x366);
-{ uint32_t x368; uint8_t x369 = _addcarryx_u32(0x0, x354, x356, &x368);
-{ uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x357, x359, &x371);
-{ uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x360, x362, &x374);
-{ uint32_t x377; uint8_t x378 = _addcarryx_u32(x375, x363, x365, &x377);
-{ uint32_t x380; uint8_t _ = _addcarryx_u32(0x0, x378, x366, &x380);
-{ uint32_t _; uint8_t x384 = _addcarryx_u32(0x0, x332, x353, &_);
-{ uint32_t x386; uint8_t x387 = _addcarryx_u32(x384, x335, x368, &x386);
-{ uint32_t x389; uint8_t x390 = _addcarryx_u32(x387, x338, x371, &x389);
-{ uint32_t x392; uint8_t x393 = _addcarryx_u32(x390, x341, x374, &x392);
-{ uint32_t x395; uint8_t x396 = _addcarryx_u32(x393, x344, x377, &x395);
-{ uint32_t x398; uint8_t x399 = _addcarryx_u32(x396, x347, x380, &x398);
-{ uint8_t x400 = (x399 + x348);
-{ uint32_t x403; uint32_t x402 = _mulx_u32(x10, x13, &x403);
-{ uint32_t x406; uint32_t x405 = _mulx_u32(x10, x15, &x406);
-{ uint32_t x409; uint32_t x408 = _mulx_u32(x10, x17, &x409);
-{ uint32_t x412; uint32_t x411 = _mulx_u32(x10, x19, &x412);
-{ uint32_t x415; uint32_t x414 = _mulx_u32(x10, x18, &x415);
-{ uint32_t x417; uint8_t x418 = _addcarryx_u32(0x0, x403, x405, &x417);
-{ uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x406, x408, &x420);
-{ uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x409, x411, &x423);
-{ uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x412, x414, &x426);
-{ uint32_t x429; uint8_t _ = _addcarryx_u32(0x0, x427, x415, &x429);
-{ uint32_t x432; uint8_t x433 = _addcarryx_u32(0x0, x386, x402, &x432);
-{ uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x389, x417, &x435);
-{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x392, x420, &x438);
-{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x395, x423, &x441);
-{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x398, x426, &x444);
-{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x400, x429, &x447);
-{ uint32_t _; uint32_t x450 = _mulx_u32(x432, 0x38e38e39, &_);
-{ uint32_t x454; uint32_t x453 = _mulx_u32(x450, 0xfffffff7, &x454);
-{ uint32_t x457; uint32_t x456 = _mulx_u32(x450, 0xffffffff, &x457);
-{ uint32_t x460; uint32_t x459 = _mulx_u32(x450, 0xffffffff, &x460);
-{ uint32_t x463; uint32_t x462 = _mulx_u32(x450, 0xffffffff, &x463);
-{ uint32_t x466; uint32_t x465 = _mulx_u32(x450, 0x1fff, &x466);
-{ uint32_t x468; uint8_t x469 = _addcarryx_u32(0x0, x454, x456, &x468);
-{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x457, x459, &x471);
-{ uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x460, x462, &x474);
-{ uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x463, x465, &x477);
-{ uint32_t x480; uint8_t _ = _addcarryx_u32(0x0, x478, x466, &x480);
-{ uint32_t _; uint8_t x484 = _addcarryx_u32(0x0, x432, x453, &_);
-{ uint32_t x486; uint8_t x487 = _addcarryx_u32(x484, x435, x468, &x486);
-{ uint32_t x489; uint8_t x490 = _addcarryx_u32(x487, x438, x471, &x489);
-{ uint32_t x492; uint8_t x493 = _addcarryx_u32(x490, x441, x474, &x492);
-{ uint32_t x495; uint8_t x496 = _addcarryx_u32(x493, x444, x477, &x495);
-{ uint32_t x498; uint8_t x499 = _addcarryx_u32(x496, x447, x480, &x498);
-{ uint8_t x500 = (x499 + x448);
-{ uint32_t x502; uint8_t x503 = _subborrow_u32(0x0, x486, 0xfffffff7, &x502);
-{ uint32_t x505; uint8_t x506 = _subborrow_u32(x503, x489, 0xffffffff, &x505);
-{ uint32_t x508; uint8_t x509 = _subborrow_u32(x506, x492, 0xffffffff, &x508);
-{ uint32_t x511; uint8_t x512 = _subborrow_u32(x509, x495, 0xffffffff, &x511);
-{ uint32_t x514; uint8_t x515 = _subborrow_u32(x512, x498, 0x1fff, &x514);
-{ uint32_t _; uint8_t x518 = _subborrow_u32(x515, x500, 0x0, &_);
-{ uint32_t x519 = cmovznz(x518, x514, x498);
-{ uint32_t x520 = cmovznz(x518, x511, x495);
-{ uint32_t x521 = cmovznz(x518, x508, x492);
-{ uint32_t x522 = cmovznz(x518, x505, x489);
-{ uint32_t x523 = cmovznz(x518, x502, x486);
-out[0] = x519;
-out[1] = x520;
-out[2] = x521;
-out[3] = x522;
-out[4] = x523;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
+ { uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
+ { uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
+ { uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
+ { uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
+ { uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
+ { uint32_t _; uint32_t x51 = _mulx_u32(x21, 0x38e38e39, &_);
+ { uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xfffffff7, &x55);
+ { uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
+ { uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
+ { uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
+ { uint32_t x67; uint32_t x66 = _mulx_u32(x51, 0x1fff, &x67);
+ { uint32_t x69; uint8_t x70 = _addcarryx_u32(0x0, x55, x57, &x69);
+ { uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x58, x60, &x72);
+ { uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x61, x63, &x75);
+ { uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x64, x66, &x78);
+ { uint32_t x81; uint8_t _ = _addcarryx_u32(0x0, x79, x67, &x81);
+ { uint32_t _; uint8_t x85 = _addcarryx_u32(0x0, x21, x54, &_);
+ { uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x36, x69, &x87);
+ { uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x39, x72, &x90);
+ { uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x42, x75, &x93);
+ { uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x45, x78, &x96);
+ { uint32_t x99; uint8_t x100 = _addcarryx_u32(x97, x48, x81, &x99);
+ { uint32_t x103; uint32_t x102 = _mulx_u32(x7, x13, &x103);
+ { uint32_t x106; uint32_t x105 = _mulx_u32(x7, x15, &x106);
+ { uint32_t x109; uint32_t x108 = _mulx_u32(x7, x17, &x109);
+ { uint32_t x112; uint32_t x111 = _mulx_u32(x7, x19, &x112);
+ { uint32_t x115; uint32_t x114 = _mulx_u32(x7, x18, &x115);
+ { uint32_t x117; uint8_t x118 = _addcarryx_u32(0x0, x103, x105, &x117);
+ { uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
+ { uint32_t x123; uint8_t x124 = _addcarryx_u32(x121, x109, x111, &x123);
+ { uint32_t x126; uint8_t x127 = _addcarryx_u32(x124, x112, x114, &x126);
+ { uint32_t x129; uint8_t _ = _addcarryx_u32(0x0, x127, x115, &x129);
+ { uint32_t x132; uint8_t x133 = _addcarryx_u32(0x0, x87, x102, &x132);
+ { uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
+ { uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
+ { uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x96, x123, &x141);
+ { uint32_t x144; uint8_t x145 = _addcarryx_u32(x142, x99, x126, &x144);
+ { uint32_t x147; uint8_t x148 = _addcarryx_u32(x145, x100, x129, &x147);
+ { uint32_t _; uint32_t x150 = _mulx_u32(x132, 0x38e38e39, &_);
+ { uint32_t x154; uint32_t x153 = _mulx_u32(x150, 0xfffffff7, &x154);
+ { uint32_t x157; uint32_t x156 = _mulx_u32(x150, 0xffffffff, &x157);
+ { uint32_t x160; uint32_t x159 = _mulx_u32(x150, 0xffffffff, &x160);
+ { uint32_t x163; uint32_t x162 = _mulx_u32(x150, 0xffffffff, &x163);
+ { uint32_t x166; uint32_t x165 = _mulx_u32(x150, 0x1fff, &x166);
+ { uint32_t x168; uint8_t x169 = _addcarryx_u32(0x0, x154, x156, &x168);
+ { uint32_t x171; uint8_t x172 = _addcarryx_u32(x169, x157, x159, &x171);
+ { uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x160, x162, &x174);
+ { uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x163, x165, &x177);
+ { uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x166, &x180);
+ { uint32_t _; uint8_t x184 = _addcarryx_u32(0x0, x132, x153, &_);
+ { uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x135, x168, &x186);
+ { uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x138, x171, &x189);
+ { uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x141, x174, &x192);
+ { uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x144, x177, &x195);
+ { uint32_t x198; uint8_t x199 = _addcarryx_u32(x196, x147, x180, &x198);
+ { uint8_t x200 = (x199 + x148);
+ { uint32_t x203; uint32_t x202 = _mulx_u32(x9, x13, &x203);
+ { uint32_t x206; uint32_t x205 = _mulx_u32(x9, x15, &x206);
+ { uint32_t x209; uint32_t x208 = _mulx_u32(x9, x17, &x209);
+ { uint32_t x212; uint32_t x211 = _mulx_u32(x9, x19, &x212);
+ { uint32_t x215; uint32_t x214 = _mulx_u32(x9, x18, &x215);
+ { uint32_t x217; uint8_t x218 = _addcarryx_u32(0x0, x203, x205, &x217);
+ { uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x206, x208, &x220);
+ { uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x209, x211, &x223);
+ { uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x212, x214, &x226);
+ { uint32_t x229; uint8_t _ = _addcarryx_u32(0x0, x227, x215, &x229);
+ { uint32_t x232; uint8_t x233 = _addcarryx_u32(0x0, x186, x202, &x232);
+ { uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x189, x217, &x235);
+ { uint32_t x238; uint8_t x239 = _addcarryx_u32(x236, x192, x220, &x238);
+ { uint32_t x241; uint8_t x242 = _addcarryx_u32(x239, x195, x223, &x241);
+ { uint32_t x244; uint8_t x245 = _addcarryx_u32(x242, x198, x226, &x244);
+ { uint32_t x247; uint8_t x248 = _addcarryx_u32(x245, x200, x229, &x247);
+ { uint32_t _; uint32_t x250 = _mulx_u32(x232, 0x38e38e39, &_);
+ { uint32_t x254; uint32_t x253 = _mulx_u32(x250, 0xfffffff7, &x254);
+ { uint32_t x257; uint32_t x256 = _mulx_u32(x250, 0xffffffff, &x257);
+ { uint32_t x260; uint32_t x259 = _mulx_u32(x250, 0xffffffff, &x260);
+ { uint32_t x263; uint32_t x262 = _mulx_u32(x250, 0xffffffff, &x263);
+ { uint32_t x266; uint32_t x265 = _mulx_u32(x250, 0x1fff, &x266);
+ { uint32_t x268; uint8_t x269 = _addcarryx_u32(0x0, x254, x256, &x268);
+ { uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x257, x259, &x271);
+ { uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x260, x262, &x274);
+ { uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x263, x265, &x277);
+ { uint32_t x280; uint8_t _ = _addcarryx_u32(0x0, x278, x266, &x280);
+ { uint32_t _; uint8_t x284 = _addcarryx_u32(0x0, x232, x253, &_);
+ { uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x235, x268, &x286);
+ { uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x238, x271, &x289);
+ { uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x241, x274, &x292);
+ { uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x244, x277, &x295);
+ { uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x247, x280, &x298);
+ { uint8_t x300 = (x299 + x248);
+ { uint32_t x303; uint32_t x302 = _mulx_u32(x11, x13, &x303);
+ { uint32_t x306; uint32_t x305 = _mulx_u32(x11, x15, &x306);
+ { uint32_t x309; uint32_t x308 = _mulx_u32(x11, x17, &x309);
+ { uint32_t x312; uint32_t x311 = _mulx_u32(x11, x19, &x312);
+ { uint32_t x315; uint32_t x314 = _mulx_u32(x11, x18, &x315);
+ { uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x303, x305, &x317);
+ { uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x306, x308, &x320);
+ { uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x309, x311, &x323);
+ { uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x312, x314, &x326);
+ { uint32_t x329; uint8_t _ = _addcarryx_u32(0x0, x327, x315, &x329);
+ { uint32_t x332; uint8_t x333 = _addcarryx_u32(0x0, x286, x302, &x332);
+ { uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x289, x317, &x335);
+ { uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x292, x320, &x338);
+ { uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x295, x323, &x341);
+ { uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x298, x326, &x344);
+ { uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x300, x329, &x347);
+ { uint32_t _; uint32_t x350 = _mulx_u32(x332, 0x38e38e39, &_);
+ { uint32_t x354; uint32_t x353 = _mulx_u32(x350, 0xfffffff7, &x354);
+ { uint32_t x357; uint32_t x356 = _mulx_u32(x350, 0xffffffff, &x357);
+ { uint32_t x360; uint32_t x359 = _mulx_u32(x350, 0xffffffff, &x360);
+ { uint32_t x363; uint32_t x362 = _mulx_u32(x350, 0xffffffff, &x363);
+ { uint32_t x366; uint32_t x365 = _mulx_u32(x350, 0x1fff, &x366);
+ { uint32_t x368; uint8_t x369 = _addcarryx_u32(0x0, x354, x356, &x368);
+ { uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x357, x359, &x371);
+ { uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x360, x362, &x374);
+ { uint32_t x377; uint8_t x378 = _addcarryx_u32(x375, x363, x365, &x377);
+ { uint32_t x380; uint8_t _ = _addcarryx_u32(0x0, x378, x366, &x380);
+ { uint32_t _; uint8_t x384 = _addcarryx_u32(0x0, x332, x353, &_);
+ { uint32_t x386; uint8_t x387 = _addcarryx_u32(x384, x335, x368, &x386);
+ { uint32_t x389; uint8_t x390 = _addcarryx_u32(x387, x338, x371, &x389);
+ { uint32_t x392; uint8_t x393 = _addcarryx_u32(x390, x341, x374, &x392);
+ { uint32_t x395; uint8_t x396 = _addcarryx_u32(x393, x344, x377, &x395);
+ { uint32_t x398; uint8_t x399 = _addcarryx_u32(x396, x347, x380, &x398);
+ { uint8_t x400 = (x399 + x348);
+ { uint32_t x403; uint32_t x402 = _mulx_u32(x10, x13, &x403);
+ { uint32_t x406; uint32_t x405 = _mulx_u32(x10, x15, &x406);
+ { uint32_t x409; uint32_t x408 = _mulx_u32(x10, x17, &x409);
+ { uint32_t x412; uint32_t x411 = _mulx_u32(x10, x19, &x412);
+ { uint32_t x415; uint32_t x414 = _mulx_u32(x10, x18, &x415);
+ { uint32_t x417; uint8_t x418 = _addcarryx_u32(0x0, x403, x405, &x417);
+ { uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x406, x408, &x420);
+ { uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x409, x411, &x423);
+ { uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x412, x414, &x426);
+ { uint32_t x429; uint8_t _ = _addcarryx_u32(0x0, x427, x415, &x429);
+ { uint32_t x432; uint8_t x433 = _addcarryx_u32(0x0, x386, x402, &x432);
+ { uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x389, x417, &x435);
+ { uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x392, x420, &x438);
+ { uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x395, x423, &x441);
+ { uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x398, x426, &x444);
+ { uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x400, x429, &x447);
+ { uint32_t _; uint32_t x450 = _mulx_u32(x432, 0x38e38e39, &_);
+ { uint32_t x454; uint32_t x453 = _mulx_u32(x450, 0xfffffff7, &x454);
+ { uint32_t x457; uint32_t x456 = _mulx_u32(x450, 0xffffffff, &x457);
+ { uint32_t x460; uint32_t x459 = _mulx_u32(x450, 0xffffffff, &x460);
+ { uint32_t x463; uint32_t x462 = _mulx_u32(x450, 0xffffffff, &x463);
+ { uint32_t x466; uint32_t x465 = _mulx_u32(x450, 0x1fff, &x466);
+ { uint32_t x468; uint8_t x469 = _addcarryx_u32(0x0, x454, x456, &x468);
+ { uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x457, x459, &x471);
+ { uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x460, x462, &x474);
+ { uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x463, x465, &x477);
+ { uint32_t x480; uint8_t _ = _addcarryx_u32(0x0, x478, x466, &x480);
+ { uint32_t _; uint8_t x484 = _addcarryx_u32(0x0, x432, x453, &_);
+ { uint32_t x486; uint8_t x487 = _addcarryx_u32(x484, x435, x468, &x486);
+ { uint32_t x489; uint8_t x490 = _addcarryx_u32(x487, x438, x471, &x489);
+ { uint32_t x492; uint8_t x493 = _addcarryx_u32(x490, x441, x474, &x492);
+ { uint32_t x495; uint8_t x496 = _addcarryx_u32(x493, x444, x477, &x495);
+ { uint32_t x498; uint8_t x499 = _addcarryx_u32(x496, x447, x480, &x498);
+ { uint8_t x500 = (x499 + x448);
+ { uint32_t x502; uint8_t x503 = _subborrow_u32(0x0, x486, 0xfffffff7, &x502);
+ { uint32_t x505; uint8_t x506 = _subborrow_u32(x503, x489, 0xffffffff, &x505);
+ { uint32_t x508; uint8_t x509 = _subborrow_u32(x506, x492, 0xffffffff, &x508);
+ { uint32_t x511; uint8_t x512 = _subborrow_u32(x509, x495, 0xffffffff, &x511);
+ { uint32_t x514; uint8_t x515 = _subborrow_u32(x512, x498, 0x1fff, &x514);
+ { uint32_t _; uint8_t x518 = _subborrow_u32(x515, x500, 0x0, &_);
+ { uint32_t x519 = cmovznz(x518, x514, x498);
+ { uint32_t x520 = cmovznz(x518, x511, x495);
+ { uint32_t x521 = cmovznz(x518, x508, x492);
+ { uint32_t x522 = cmovznz(x518, x505, x489);
+ { uint32_t x523 = cmovznz(x518, x502, x486);
+ out[0] = x523;
+ out[1] = x522;
+ out[2] = x521;
+ out[3] = x520;
+ out[4] = x519;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e141m9/fenz.c b/src/Specific/montgomery32_2e141m9/fenz.c
index 755695e18..5601e8c15 100644
--- a/src/Specific/montgomery32_2e141m9/fenz.c
+++ b/src/Specific/montgomery32_2e141m9/fenz.c
@@ -1,26 +1,13 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x9 = (x8 | x7);
-{ uint32_t x10 = (x6 | x9);
-{ uint32_t x11 = (x4 | x10);
-{ uint32_t x12 = (x2 | x11);
-out[0] = x12;
-}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[5]) {
+ { const uint32_t x7 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x9 = (x8 | x7);
+ { uint32_t x10 = (x6 | x9);
+ { uint32_t x11 = (x4 | x10);
+ { uint32_t x12 = (x2 | x11);
+ out[0] = x12;
+ }}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e141m9/feopp.c b/src/Specific/montgomery32_2e141m9/feopp.c
index be4e2fff3..a9badfe08 100644
--- a/src/Specific/montgomery32_2e141m9/feopp.c
+++ b/src/Specific/montgomery32_2e141m9/feopp.c
@@ -1,42 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
-{ uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
-{ uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
-{ uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
-{ uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
-{ uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
-{ uint32_t x25 = (x24 & 0xfffffff7);
-{ uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
-{ uint32_t x29 = (x24 & 0xffffffff);
-{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
-{ uint32_t x33 = (x24 & 0xffffffff);
-{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
-{ uint32_t x37 = (x24 & 0xffffffff);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
-{ uint32_t x41 = (x24 & 0x1fff);
-{ uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-out[4] = x27;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feopp(uint32_t out[5], const uint32_t in1[5]) {
+ { const uint32_t x7 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
+ { uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
+ { uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
+ { uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
+ { uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
+ { uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
+ { uint32_t x25 = (x24 & 0xfffffff7);
+ { uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
+ { uint32_t x29 = (x24 & 0xffffffff);
+ { uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
+ { uint32_t x33 = (x24 & 0xffffffff);
+ { uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
+ { uint32_t x37 = (x24 & 0xffffffff);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
+ { uint32_t x41 = (x24 & 0x1fff);
+ { uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e141m9/fesub.c b/src/Specific/montgomery32_2e141m9/fesub.c
index 09a0effa4..162164a3e 100644
--- a/src/Specific/montgomery32_2e141m9/fesub.c
+++ b/src/Specific/montgomery32_2e141m9/fesub.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
-{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
-{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
-{ uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
-{ uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
-{ uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
-{ uint32_t x36 = (x35 & 0xfffffff7);
-{ uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
-{ uint32_t x40 = (x35 & 0xffffffff);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
-{ uint32_t x44 = (x35 & 0xffffffff);
-{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
-{ uint32_t x48 = (x35 & 0xffffffff);
-{ uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
-{ uint32_t x52 = (x35 & 0x1fff);
-{ uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
-out[0] = x54;
-out[1] = x50;
-out[2] = x46;
-out[3] = x42;
-out[4] = x38;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesub(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
+ { uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
+ { uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
+ { uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
+ { uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
+ { uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
+ { uint32_t x36 = (x35 & 0xfffffff7);
+ { uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
+ { uint32_t x40 = (x35 & 0xffffffff);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
+ { uint32_t x44 = (x35 & 0xffffffff);
+ { uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
+ { uint32_t x48 = (x35 & 0xffffffff);
+ { uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
+ { uint32_t x52 = (x35 & 0x1fff);
+ { uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
+ out[0] = x38;
+ out[1] = x42;
+ out[2] = x46;
+ out[3] = x50;
+ out[4] = x54;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e150m3/feadd.c b/src/Specific/montgomery32_2e150m3/feadd.c
index 6d48d8e63..4315fde8b 100644
--- a/src/Specific/montgomery32_2e150m3/feadd.c
+++ b/src/Specific/montgomery32_2e150m3/feadd.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
-{ uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
-{ uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
-{ uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
-{ uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
-{ uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xfffffffd, &x36);
-{ uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
-{ uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
-{ uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
-{ uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0x3fffff, &x48);
-{ uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
-{ uint32_t x53 = cmovznz(x52, x48, x33);
-{ uint32_t x54 = cmovznz(x52, x45, x30);
-{ uint32_t x55 = cmovznz(x52, x42, x27);
-{ uint32_t x56 = cmovznz(x52, x39, x24);
-{ uint32_t x57 = cmovznz(x52, x36, x21);
-out[0] = x53;
-out[1] = x54;
-out[2] = x55;
-out[3] = x56;
-out[4] = x57;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feadd(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
+ { uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
+ { uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
+ { uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
+ { uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
+ { uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xfffffffd, &x36);
+ { uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
+ { uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
+ { uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
+ { uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0x3fffff, &x48);
+ { uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
+ { uint32_t x53 = cmovznz(x52, x48, x33);
+ { uint32_t x54 = cmovznz(x52, x45, x30);
+ { uint32_t x55 = cmovznz(x52, x42, x27);
+ { uint32_t x56 = cmovznz(x52, x39, x24);
+ { uint32_t x57 = cmovznz(x52, x36, x21);
+ out[0] = x57;
+ out[1] = x56;
+ out[2] = x55;
+ out[3] = x54;
+ out[4] = x53;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e150m3/femul.c b/src/Specific/montgomery32_2e150m3/femul.c
index c3749a6dd..e9d0ccd9d 100644
--- a/src/Specific/montgomery32_2e150m3/femul.c
+++ b/src/Specific/montgomery32_2e150m3/femul.c
@@ -1,200 +1,192 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
-{ uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
-{ uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
-{ uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
-{ uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
-{ uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
-{ uint32_t _; uint32_t x51 = _mulx_u32(x21, 0xaaaaaaab, &_);
-{ uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xfffffffd, &x55);
-{ uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
-{ uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
-{ uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
-{ uint32_t x67; uint32_t x66 = _mulx_u32(x51, 0x3fffff, &x67);
-{ uint32_t x69; uint8_t x70 = _addcarryx_u32(0x0, x55, x57, &x69);
-{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x58, x60, &x72);
-{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x61, x63, &x75);
-{ uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x64, x66, &x78);
-{ uint32_t x81; uint8_t _ = _addcarryx_u32(0x0, x79, x67, &x81);
-{ uint32_t _; uint8_t x85 = _addcarryx_u32(0x0, x21, x54, &_);
-{ uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x36, x69, &x87);
-{ uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x39, x72, &x90);
-{ uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x42, x75, &x93);
-{ uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x45, x78, &x96);
-{ uint32_t x99; uint8_t x100 = _addcarryx_u32(x97, x48, x81, &x99);
-{ uint32_t x103; uint32_t x102 = _mulx_u32(x7, x13, &x103);
-{ uint32_t x106; uint32_t x105 = _mulx_u32(x7, x15, &x106);
-{ uint32_t x109; uint32_t x108 = _mulx_u32(x7, x17, &x109);
-{ uint32_t x112; uint32_t x111 = _mulx_u32(x7, x19, &x112);
-{ uint32_t x115; uint32_t x114 = _mulx_u32(x7, x18, &x115);
-{ uint32_t x117; uint8_t x118 = _addcarryx_u32(0x0, x103, x105, &x117);
-{ uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
-{ uint32_t x123; uint8_t x124 = _addcarryx_u32(x121, x109, x111, &x123);
-{ uint32_t x126; uint8_t x127 = _addcarryx_u32(x124, x112, x114, &x126);
-{ uint32_t x129; uint8_t _ = _addcarryx_u32(0x0, x127, x115, &x129);
-{ uint32_t x132; uint8_t x133 = _addcarryx_u32(0x0, x87, x102, &x132);
-{ uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
-{ uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
-{ uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x96, x123, &x141);
-{ uint32_t x144; uint8_t x145 = _addcarryx_u32(x142, x99, x126, &x144);
-{ uint32_t x147; uint8_t x148 = _addcarryx_u32(x145, x100, x129, &x147);
-{ uint32_t _; uint32_t x150 = _mulx_u32(x132, 0xaaaaaaab, &_);
-{ uint32_t x154; uint32_t x153 = _mulx_u32(x150, 0xfffffffd, &x154);
-{ uint32_t x157; uint32_t x156 = _mulx_u32(x150, 0xffffffff, &x157);
-{ uint32_t x160; uint32_t x159 = _mulx_u32(x150, 0xffffffff, &x160);
-{ uint32_t x163; uint32_t x162 = _mulx_u32(x150, 0xffffffff, &x163);
-{ uint32_t x166; uint32_t x165 = _mulx_u32(x150, 0x3fffff, &x166);
-{ uint32_t x168; uint8_t x169 = _addcarryx_u32(0x0, x154, x156, &x168);
-{ uint32_t x171; uint8_t x172 = _addcarryx_u32(x169, x157, x159, &x171);
-{ uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x160, x162, &x174);
-{ uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x163, x165, &x177);
-{ uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x166, &x180);
-{ uint32_t _; uint8_t x184 = _addcarryx_u32(0x0, x132, x153, &_);
-{ uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x135, x168, &x186);
-{ uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x138, x171, &x189);
-{ uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x141, x174, &x192);
-{ uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x144, x177, &x195);
-{ uint32_t x198; uint8_t x199 = _addcarryx_u32(x196, x147, x180, &x198);
-{ uint8_t x200 = (x199 + x148);
-{ uint32_t x203; uint32_t x202 = _mulx_u32(x9, x13, &x203);
-{ uint32_t x206; uint32_t x205 = _mulx_u32(x9, x15, &x206);
-{ uint32_t x209; uint32_t x208 = _mulx_u32(x9, x17, &x209);
-{ uint32_t x212; uint32_t x211 = _mulx_u32(x9, x19, &x212);
-{ uint32_t x215; uint32_t x214 = _mulx_u32(x9, x18, &x215);
-{ uint32_t x217; uint8_t x218 = _addcarryx_u32(0x0, x203, x205, &x217);
-{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x206, x208, &x220);
-{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x209, x211, &x223);
-{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x212, x214, &x226);
-{ uint32_t x229; uint8_t _ = _addcarryx_u32(0x0, x227, x215, &x229);
-{ uint32_t x232; uint8_t x233 = _addcarryx_u32(0x0, x186, x202, &x232);
-{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x189, x217, &x235);
-{ uint32_t x238; uint8_t x239 = _addcarryx_u32(x236, x192, x220, &x238);
-{ uint32_t x241; uint8_t x242 = _addcarryx_u32(x239, x195, x223, &x241);
-{ uint32_t x244; uint8_t x245 = _addcarryx_u32(x242, x198, x226, &x244);
-{ uint32_t x247; uint8_t x248 = _addcarryx_u32(x245, x200, x229, &x247);
-{ uint32_t _; uint32_t x250 = _mulx_u32(x232, 0xaaaaaaab, &_);
-{ uint32_t x254; uint32_t x253 = _mulx_u32(x250, 0xfffffffd, &x254);
-{ uint32_t x257; uint32_t x256 = _mulx_u32(x250, 0xffffffff, &x257);
-{ uint32_t x260; uint32_t x259 = _mulx_u32(x250, 0xffffffff, &x260);
-{ uint32_t x263; uint32_t x262 = _mulx_u32(x250, 0xffffffff, &x263);
-{ uint32_t x266; uint32_t x265 = _mulx_u32(x250, 0x3fffff, &x266);
-{ uint32_t x268; uint8_t x269 = _addcarryx_u32(0x0, x254, x256, &x268);
-{ uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x257, x259, &x271);
-{ uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x260, x262, &x274);
-{ uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x263, x265, &x277);
-{ uint32_t x280; uint8_t _ = _addcarryx_u32(0x0, x278, x266, &x280);
-{ uint32_t _; uint8_t x284 = _addcarryx_u32(0x0, x232, x253, &_);
-{ uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x235, x268, &x286);
-{ uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x238, x271, &x289);
-{ uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x241, x274, &x292);
-{ uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x244, x277, &x295);
-{ uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x247, x280, &x298);
-{ uint8_t x300 = (x299 + x248);
-{ uint32_t x303; uint32_t x302 = _mulx_u32(x11, x13, &x303);
-{ uint32_t x306; uint32_t x305 = _mulx_u32(x11, x15, &x306);
-{ uint32_t x309; uint32_t x308 = _mulx_u32(x11, x17, &x309);
-{ uint32_t x312; uint32_t x311 = _mulx_u32(x11, x19, &x312);
-{ uint32_t x315; uint32_t x314 = _mulx_u32(x11, x18, &x315);
-{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x303, x305, &x317);
-{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x306, x308, &x320);
-{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x309, x311, &x323);
-{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x312, x314, &x326);
-{ uint32_t x329; uint8_t _ = _addcarryx_u32(0x0, x327, x315, &x329);
-{ uint32_t x332; uint8_t x333 = _addcarryx_u32(0x0, x286, x302, &x332);
-{ uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x289, x317, &x335);
-{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x292, x320, &x338);
-{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x295, x323, &x341);
-{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x298, x326, &x344);
-{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x300, x329, &x347);
-{ uint32_t _; uint32_t x350 = _mulx_u32(x332, 0xaaaaaaab, &_);
-{ uint32_t x354; uint32_t x353 = _mulx_u32(x350, 0xfffffffd, &x354);
-{ uint32_t x357; uint32_t x356 = _mulx_u32(x350, 0xffffffff, &x357);
-{ uint32_t x360; uint32_t x359 = _mulx_u32(x350, 0xffffffff, &x360);
-{ uint32_t x363; uint32_t x362 = _mulx_u32(x350, 0xffffffff, &x363);
-{ uint32_t x366; uint32_t x365 = _mulx_u32(x350, 0x3fffff, &x366);
-{ uint32_t x368; uint8_t x369 = _addcarryx_u32(0x0, x354, x356, &x368);
-{ uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x357, x359, &x371);
-{ uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x360, x362, &x374);
-{ uint32_t x377; uint8_t x378 = _addcarryx_u32(x375, x363, x365, &x377);
-{ uint32_t x380; uint8_t _ = _addcarryx_u32(0x0, x378, x366, &x380);
-{ uint32_t _; uint8_t x384 = _addcarryx_u32(0x0, x332, x353, &_);
-{ uint32_t x386; uint8_t x387 = _addcarryx_u32(x384, x335, x368, &x386);
-{ uint32_t x389; uint8_t x390 = _addcarryx_u32(x387, x338, x371, &x389);
-{ uint32_t x392; uint8_t x393 = _addcarryx_u32(x390, x341, x374, &x392);
-{ uint32_t x395; uint8_t x396 = _addcarryx_u32(x393, x344, x377, &x395);
-{ uint32_t x398; uint8_t x399 = _addcarryx_u32(x396, x347, x380, &x398);
-{ uint8_t x400 = (x399 + x348);
-{ uint32_t x403; uint32_t x402 = _mulx_u32(x10, x13, &x403);
-{ uint32_t x406; uint32_t x405 = _mulx_u32(x10, x15, &x406);
-{ uint32_t x409; uint32_t x408 = _mulx_u32(x10, x17, &x409);
-{ uint32_t x412; uint32_t x411 = _mulx_u32(x10, x19, &x412);
-{ uint32_t x415; uint32_t x414 = _mulx_u32(x10, x18, &x415);
-{ uint32_t x417; uint8_t x418 = _addcarryx_u32(0x0, x403, x405, &x417);
-{ uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x406, x408, &x420);
-{ uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x409, x411, &x423);
-{ uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x412, x414, &x426);
-{ uint32_t x429; uint8_t _ = _addcarryx_u32(0x0, x427, x415, &x429);
-{ uint32_t x432; uint8_t x433 = _addcarryx_u32(0x0, x386, x402, &x432);
-{ uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x389, x417, &x435);
-{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x392, x420, &x438);
-{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x395, x423, &x441);
-{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x398, x426, &x444);
-{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x400, x429, &x447);
-{ uint32_t _; uint32_t x450 = _mulx_u32(x432, 0xaaaaaaab, &_);
-{ uint32_t x454; uint32_t x453 = _mulx_u32(x450, 0xfffffffd, &x454);
-{ uint32_t x457; uint32_t x456 = _mulx_u32(x450, 0xffffffff, &x457);
-{ uint32_t x460; uint32_t x459 = _mulx_u32(x450, 0xffffffff, &x460);
-{ uint32_t x463; uint32_t x462 = _mulx_u32(x450, 0xffffffff, &x463);
-{ uint32_t x466; uint32_t x465 = _mulx_u32(x450, 0x3fffff, &x466);
-{ uint32_t x468; uint8_t x469 = _addcarryx_u32(0x0, x454, x456, &x468);
-{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x457, x459, &x471);
-{ uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x460, x462, &x474);
-{ uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x463, x465, &x477);
-{ uint32_t x480; uint8_t _ = _addcarryx_u32(0x0, x478, x466, &x480);
-{ uint32_t _; uint8_t x484 = _addcarryx_u32(0x0, x432, x453, &_);
-{ uint32_t x486; uint8_t x487 = _addcarryx_u32(x484, x435, x468, &x486);
-{ uint32_t x489; uint8_t x490 = _addcarryx_u32(x487, x438, x471, &x489);
-{ uint32_t x492; uint8_t x493 = _addcarryx_u32(x490, x441, x474, &x492);
-{ uint32_t x495; uint8_t x496 = _addcarryx_u32(x493, x444, x477, &x495);
-{ uint32_t x498; uint8_t x499 = _addcarryx_u32(x496, x447, x480, &x498);
-{ uint8_t x500 = (x499 + x448);
-{ uint32_t x502; uint8_t x503 = _subborrow_u32(0x0, x486, 0xfffffffd, &x502);
-{ uint32_t x505; uint8_t x506 = _subborrow_u32(x503, x489, 0xffffffff, &x505);
-{ uint32_t x508; uint8_t x509 = _subborrow_u32(x506, x492, 0xffffffff, &x508);
-{ uint32_t x511; uint8_t x512 = _subborrow_u32(x509, x495, 0xffffffff, &x511);
-{ uint32_t x514; uint8_t x515 = _subborrow_u32(x512, x498, 0x3fffff, &x514);
-{ uint32_t _; uint8_t x518 = _subborrow_u32(x515, x500, 0x0, &_);
-{ uint32_t x519 = cmovznz(x518, x514, x498);
-{ uint32_t x520 = cmovznz(x518, x511, x495);
-{ uint32_t x521 = cmovznz(x518, x508, x492);
-{ uint32_t x522 = cmovznz(x518, x505, x489);
-{ uint32_t x523 = cmovznz(x518, x502, x486);
-out[0] = x519;
-out[1] = x520;
-out[2] = x521;
-out[3] = x522;
-out[4] = x523;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
+ { uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
+ { uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
+ { uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
+ { uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
+ { uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
+ { uint32_t _; uint32_t x51 = _mulx_u32(x21, 0xaaaaaaab, &_);
+ { uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xfffffffd, &x55);
+ { uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
+ { uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
+ { uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
+ { uint32_t x67; uint32_t x66 = _mulx_u32(x51, 0x3fffff, &x67);
+ { uint32_t x69; uint8_t x70 = _addcarryx_u32(0x0, x55, x57, &x69);
+ { uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x58, x60, &x72);
+ { uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x61, x63, &x75);
+ { uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x64, x66, &x78);
+ { uint32_t x81; uint8_t _ = _addcarryx_u32(0x0, x79, x67, &x81);
+ { uint32_t _; uint8_t x85 = _addcarryx_u32(0x0, x21, x54, &_);
+ { uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x36, x69, &x87);
+ { uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x39, x72, &x90);
+ { uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x42, x75, &x93);
+ { uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x45, x78, &x96);
+ { uint32_t x99; uint8_t x100 = _addcarryx_u32(x97, x48, x81, &x99);
+ { uint32_t x103; uint32_t x102 = _mulx_u32(x7, x13, &x103);
+ { uint32_t x106; uint32_t x105 = _mulx_u32(x7, x15, &x106);
+ { uint32_t x109; uint32_t x108 = _mulx_u32(x7, x17, &x109);
+ { uint32_t x112; uint32_t x111 = _mulx_u32(x7, x19, &x112);
+ { uint32_t x115; uint32_t x114 = _mulx_u32(x7, x18, &x115);
+ { uint32_t x117; uint8_t x118 = _addcarryx_u32(0x0, x103, x105, &x117);
+ { uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
+ { uint32_t x123; uint8_t x124 = _addcarryx_u32(x121, x109, x111, &x123);
+ { uint32_t x126; uint8_t x127 = _addcarryx_u32(x124, x112, x114, &x126);
+ { uint32_t x129; uint8_t _ = _addcarryx_u32(0x0, x127, x115, &x129);
+ { uint32_t x132; uint8_t x133 = _addcarryx_u32(0x0, x87, x102, &x132);
+ { uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
+ { uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
+ { uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x96, x123, &x141);
+ { uint32_t x144; uint8_t x145 = _addcarryx_u32(x142, x99, x126, &x144);
+ { uint32_t x147; uint8_t x148 = _addcarryx_u32(x145, x100, x129, &x147);
+ { uint32_t _; uint32_t x150 = _mulx_u32(x132, 0xaaaaaaab, &_);
+ { uint32_t x154; uint32_t x153 = _mulx_u32(x150, 0xfffffffd, &x154);
+ { uint32_t x157; uint32_t x156 = _mulx_u32(x150, 0xffffffff, &x157);
+ { uint32_t x160; uint32_t x159 = _mulx_u32(x150, 0xffffffff, &x160);
+ { uint32_t x163; uint32_t x162 = _mulx_u32(x150, 0xffffffff, &x163);
+ { uint32_t x166; uint32_t x165 = _mulx_u32(x150, 0x3fffff, &x166);
+ { uint32_t x168; uint8_t x169 = _addcarryx_u32(0x0, x154, x156, &x168);
+ { uint32_t x171; uint8_t x172 = _addcarryx_u32(x169, x157, x159, &x171);
+ { uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x160, x162, &x174);
+ { uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x163, x165, &x177);
+ { uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x166, &x180);
+ { uint32_t _; uint8_t x184 = _addcarryx_u32(0x0, x132, x153, &_);
+ { uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x135, x168, &x186);
+ { uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x138, x171, &x189);
+ { uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x141, x174, &x192);
+ { uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x144, x177, &x195);
+ { uint32_t x198; uint8_t x199 = _addcarryx_u32(x196, x147, x180, &x198);
+ { uint8_t x200 = (x199 + x148);
+ { uint32_t x203; uint32_t x202 = _mulx_u32(x9, x13, &x203);
+ { uint32_t x206; uint32_t x205 = _mulx_u32(x9, x15, &x206);
+ { uint32_t x209; uint32_t x208 = _mulx_u32(x9, x17, &x209);
+ { uint32_t x212; uint32_t x211 = _mulx_u32(x9, x19, &x212);
+ { uint32_t x215; uint32_t x214 = _mulx_u32(x9, x18, &x215);
+ { uint32_t x217; uint8_t x218 = _addcarryx_u32(0x0, x203, x205, &x217);
+ { uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x206, x208, &x220);
+ { uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x209, x211, &x223);
+ { uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x212, x214, &x226);
+ { uint32_t x229; uint8_t _ = _addcarryx_u32(0x0, x227, x215, &x229);
+ { uint32_t x232; uint8_t x233 = _addcarryx_u32(0x0, x186, x202, &x232);
+ { uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x189, x217, &x235);
+ { uint32_t x238; uint8_t x239 = _addcarryx_u32(x236, x192, x220, &x238);
+ { uint32_t x241; uint8_t x242 = _addcarryx_u32(x239, x195, x223, &x241);
+ { uint32_t x244; uint8_t x245 = _addcarryx_u32(x242, x198, x226, &x244);
+ { uint32_t x247; uint8_t x248 = _addcarryx_u32(x245, x200, x229, &x247);
+ { uint32_t _; uint32_t x250 = _mulx_u32(x232, 0xaaaaaaab, &_);
+ { uint32_t x254; uint32_t x253 = _mulx_u32(x250, 0xfffffffd, &x254);
+ { uint32_t x257; uint32_t x256 = _mulx_u32(x250, 0xffffffff, &x257);
+ { uint32_t x260; uint32_t x259 = _mulx_u32(x250, 0xffffffff, &x260);
+ { uint32_t x263; uint32_t x262 = _mulx_u32(x250, 0xffffffff, &x263);
+ { uint32_t x266; uint32_t x265 = _mulx_u32(x250, 0x3fffff, &x266);
+ { uint32_t x268; uint8_t x269 = _addcarryx_u32(0x0, x254, x256, &x268);
+ { uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x257, x259, &x271);
+ { uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x260, x262, &x274);
+ { uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x263, x265, &x277);
+ { uint32_t x280; uint8_t _ = _addcarryx_u32(0x0, x278, x266, &x280);
+ { uint32_t _; uint8_t x284 = _addcarryx_u32(0x0, x232, x253, &_);
+ { uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x235, x268, &x286);
+ { uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x238, x271, &x289);
+ { uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x241, x274, &x292);
+ { uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x244, x277, &x295);
+ { uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x247, x280, &x298);
+ { uint8_t x300 = (x299 + x248);
+ { uint32_t x303; uint32_t x302 = _mulx_u32(x11, x13, &x303);
+ { uint32_t x306; uint32_t x305 = _mulx_u32(x11, x15, &x306);
+ { uint32_t x309; uint32_t x308 = _mulx_u32(x11, x17, &x309);
+ { uint32_t x312; uint32_t x311 = _mulx_u32(x11, x19, &x312);
+ { uint32_t x315; uint32_t x314 = _mulx_u32(x11, x18, &x315);
+ { uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x303, x305, &x317);
+ { uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x306, x308, &x320);
+ { uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x309, x311, &x323);
+ { uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x312, x314, &x326);
+ { uint32_t x329; uint8_t _ = _addcarryx_u32(0x0, x327, x315, &x329);
+ { uint32_t x332; uint8_t x333 = _addcarryx_u32(0x0, x286, x302, &x332);
+ { uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x289, x317, &x335);
+ { uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x292, x320, &x338);
+ { uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x295, x323, &x341);
+ { uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x298, x326, &x344);
+ { uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x300, x329, &x347);
+ { uint32_t _; uint32_t x350 = _mulx_u32(x332, 0xaaaaaaab, &_);
+ { uint32_t x354; uint32_t x353 = _mulx_u32(x350, 0xfffffffd, &x354);
+ { uint32_t x357; uint32_t x356 = _mulx_u32(x350, 0xffffffff, &x357);
+ { uint32_t x360; uint32_t x359 = _mulx_u32(x350, 0xffffffff, &x360);
+ { uint32_t x363; uint32_t x362 = _mulx_u32(x350, 0xffffffff, &x363);
+ { uint32_t x366; uint32_t x365 = _mulx_u32(x350, 0x3fffff, &x366);
+ { uint32_t x368; uint8_t x369 = _addcarryx_u32(0x0, x354, x356, &x368);
+ { uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x357, x359, &x371);
+ { uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x360, x362, &x374);
+ { uint32_t x377; uint8_t x378 = _addcarryx_u32(x375, x363, x365, &x377);
+ { uint32_t x380; uint8_t _ = _addcarryx_u32(0x0, x378, x366, &x380);
+ { uint32_t _; uint8_t x384 = _addcarryx_u32(0x0, x332, x353, &_);
+ { uint32_t x386; uint8_t x387 = _addcarryx_u32(x384, x335, x368, &x386);
+ { uint32_t x389; uint8_t x390 = _addcarryx_u32(x387, x338, x371, &x389);
+ { uint32_t x392; uint8_t x393 = _addcarryx_u32(x390, x341, x374, &x392);
+ { uint32_t x395; uint8_t x396 = _addcarryx_u32(x393, x344, x377, &x395);
+ { uint32_t x398; uint8_t x399 = _addcarryx_u32(x396, x347, x380, &x398);
+ { uint8_t x400 = (x399 + x348);
+ { uint32_t x403; uint32_t x402 = _mulx_u32(x10, x13, &x403);
+ { uint32_t x406; uint32_t x405 = _mulx_u32(x10, x15, &x406);
+ { uint32_t x409; uint32_t x408 = _mulx_u32(x10, x17, &x409);
+ { uint32_t x412; uint32_t x411 = _mulx_u32(x10, x19, &x412);
+ { uint32_t x415; uint32_t x414 = _mulx_u32(x10, x18, &x415);
+ { uint32_t x417; uint8_t x418 = _addcarryx_u32(0x0, x403, x405, &x417);
+ { uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x406, x408, &x420);
+ { uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x409, x411, &x423);
+ { uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x412, x414, &x426);
+ { uint32_t x429; uint8_t _ = _addcarryx_u32(0x0, x427, x415, &x429);
+ { uint32_t x432; uint8_t x433 = _addcarryx_u32(0x0, x386, x402, &x432);
+ { uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x389, x417, &x435);
+ { uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x392, x420, &x438);
+ { uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x395, x423, &x441);
+ { uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x398, x426, &x444);
+ { uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x400, x429, &x447);
+ { uint32_t _; uint32_t x450 = _mulx_u32(x432, 0xaaaaaaab, &_);
+ { uint32_t x454; uint32_t x453 = _mulx_u32(x450, 0xfffffffd, &x454);
+ { uint32_t x457; uint32_t x456 = _mulx_u32(x450, 0xffffffff, &x457);
+ { uint32_t x460; uint32_t x459 = _mulx_u32(x450, 0xffffffff, &x460);
+ { uint32_t x463; uint32_t x462 = _mulx_u32(x450, 0xffffffff, &x463);
+ { uint32_t x466; uint32_t x465 = _mulx_u32(x450, 0x3fffff, &x466);
+ { uint32_t x468; uint8_t x469 = _addcarryx_u32(0x0, x454, x456, &x468);
+ { uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x457, x459, &x471);
+ { uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x460, x462, &x474);
+ { uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x463, x465, &x477);
+ { uint32_t x480; uint8_t _ = _addcarryx_u32(0x0, x478, x466, &x480);
+ { uint32_t _; uint8_t x484 = _addcarryx_u32(0x0, x432, x453, &_);
+ { uint32_t x486; uint8_t x487 = _addcarryx_u32(x484, x435, x468, &x486);
+ { uint32_t x489; uint8_t x490 = _addcarryx_u32(x487, x438, x471, &x489);
+ { uint32_t x492; uint8_t x493 = _addcarryx_u32(x490, x441, x474, &x492);
+ { uint32_t x495; uint8_t x496 = _addcarryx_u32(x493, x444, x477, &x495);
+ { uint32_t x498; uint8_t x499 = _addcarryx_u32(x496, x447, x480, &x498);
+ { uint8_t x500 = (x499 + x448);
+ { uint32_t x502; uint8_t x503 = _subborrow_u32(0x0, x486, 0xfffffffd, &x502);
+ { uint32_t x505; uint8_t x506 = _subborrow_u32(x503, x489, 0xffffffff, &x505);
+ { uint32_t x508; uint8_t x509 = _subborrow_u32(x506, x492, 0xffffffff, &x508);
+ { uint32_t x511; uint8_t x512 = _subborrow_u32(x509, x495, 0xffffffff, &x511);
+ { uint32_t x514; uint8_t x515 = _subborrow_u32(x512, x498, 0x3fffff, &x514);
+ { uint32_t _; uint8_t x518 = _subborrow_u32(x515, x500, 0x0, &_);
+ { uint32_t x519 = cmovznz(x518, x514, x498);
+ { uint32_t x520 = cmovznz(x518, x511, x495);
+ { uint32_t x521 = cmovznz(x518, x508, x492);
+ { uint32_t x522 = cmovznz(x518, x505, x489);
+ { uint32_t x523 = cmovznz(x518, x502, x486);
+ out[0] = x523;
+ out[1] = x522;
+ out[2] = x521;
+ out[3] = x520;
+ out[4] = x519;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e150m3/fenz.c b/src/Specific/montgomery32_2e150m3/fenz.c
index 755695e18..5601e8c15 100644
--- a/src/Specific/montgomery32_2e150m3/fenz.c
+++ b/src/Specific/montgomery32_2e150m3/fenz.c
@@ -1,26 +1,13 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x9 = (x8 | x7);
-{ uint32_t x10 = (x6 | x9);
-{ uint32_t x11 = (x4 | x10);
-{ uint32_t x12 = (x2 | x11);
-out[0] = x12;
-}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[5]) {
+ { const uint32_t x7 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x9 = (x8 | x7);
+ { uint32_t x10 = (x6 | x9);
+ { uint32_t x11 = (x4 | x10);
+ { uint32_t x12 = (x2 | x11);
+ out[0] = x12;
+ }}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e150m3/feopp.c b/src/Specific/montgomery32_2e150m3/feopp.c
index d4dec9d6d..9a5050270 100644
--- a/src/Specific/montgomery32_2e150m3/feopp.c
+++ b/src/Specific/montgomery32_2e150m3/feopp.c
@@ -1,42 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
-{ uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
-{ uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
-{ uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
-{ uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
-{ uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
-{ uint32_t x25 = (x24 & 0xfffffffd);
-{ uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
-{ uint32_t x29 = (x24 & 0xffffffff);
-{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
-{ uint32_t x33 = (x24 & 0xffffffff);
-{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
-{ uint32_t x37 = (x24 & 0xffffffff);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
-{ uint32_t x41 = (x24 & 0x3fffff);
-{ uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-out[4] = x27;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feopp(uint32_t out[5], const uint32_t in1[5]) {
+ { const uint32_t x7 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
+ { uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
+ { uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
+ { uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
+ { uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
+ { uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
+ { uint32_t x25 = (x24 & 0xfffffffd);
+ { uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
+ { uint32_t x29 = (x24 & 0xffffffff);
+ { uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
+ { uint32_t x33 = (x24 & 0xffffffff);
+ { uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
+ { uint32_t x37 = (x24 & 0xffffffff);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
+ { uint32_t x41 = (x24 & 0x3fffff);
+ { uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e150m3/fesub.c b/src/Specific/montgomery32_2e150m3/fesub.c
index 7039f7ecd..f62237483 100644
--- a/src/Specific/montgomery32_2e150m3/fesub.c
+++ b/src/Specific/montgomery32_2e150m3/fesub.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
-{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
-{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
-{ uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
-{ uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
-{ uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
-{ uint32_t x36 = (x35 & 0xfffffffd);
-{ uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
-{ uint32_t x40 = (x35 & 0xffffffff);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
-{ uint32_t x44 = (x35 & 0xffffffff);
-{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
-{ uint32_t x48 = (x35 & 0xffffffff);
-{ uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
-{ uint32_t x52 = (x35 & 0x3fffff);
-{ uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
-out[0] = x54;
-out[1] = x50;
-out[2] = x46;
-out[3] = x42;
-out[4] = x38;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesub(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
+ { uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
+ { uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
+ { uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
+ { uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
+ { uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
+ { uint32_t x36 = (x35 & 0xfffffffd);
+ { uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
+ { uint32_t x40 = (x35 & 0xffffffff);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
+ { uint32_t x44 = (x35 & 0xffffffff);
+ { uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
+ { uint32_t x48 = (x35 & 0xffffffff);
+ { uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
+ { uint32_t x52 = (x35 & 0x3fffff);
+ { uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
+ out[0] = x38;
+ out[1] = x42;
+ out[2] = x46;
+ out[3] = x50;
+ out[4] = x54;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e150m5/feadd.c b/src/Specific/montgomery32_2e150m5/feadd.c
index f0c527d11..d39f224e8 100644
--- a/src/Specific/montgomery32_2e150m5/feadd.c
+++ b/src/Specific/montgomery32_2e150m5/feadd.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
-{ uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
-{ uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
-{ uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
-{ uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
-{ uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xfffffffb, &x36);
-{ uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
-{ uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
-{ uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
-{ uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0x3fffff, &x48);
-{ uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
-{ uint32_t x53 = cmovznz(x52, x48, x33);
-{ uint32_t x54 = cmovznz(x52, x45, x30);
-{ uint32_t x55 = cmovznz(x52, x42, x27);
-{ uint32_t x56 = cmovznz(x52, x39, x24);
-{ uint32_t x57 = cmovznz(x52, x36, x21);
-out[0] = x53;
-out[1] = x54;
-out[2] = x55;
-out[3] = x56;
-out[4] = x57;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feadd(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
+ { uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
+ { uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
+ { uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
+ { uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
+ { uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xfffffffb, &x36);
+ { uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
+ { uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
+ { uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
+ { uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0x3fffff, &x48);
+ { uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
+ { uint32_t x53 = cmovznz(x52, x48, x33);
+ { uint32_t x54 = cmovznz(x52, x45, x30);
+ { uint32_t x55 = cmovznz(x52, x42, x27);
+ { uint32_t x56 = cmovznz(x52, x39, x24);
+ { uint32_t x57 = cmovznz(x52, x36, x21);
+ out[0] = x57;
+ out[1] = x56;
+ out[2] = x55;
+ out[3] = x54;
+ out[4] = x53;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e150m5/femul.c b/src/Specific/montgomery32_2e150m5/femul.c
index b0134aeff..19902bde1 100644
--- a/src/Specific/montgomery32_2e150m5/femul.c
+++ b/src/Specific/montgomery32_2e150m5/femul.c
@@ -1,200 +1,192 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
-{ uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
-{ uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
-{ uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
-{ uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
-{ uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
-{ uint32_t _; uint32_t x51 = _mulx_u32(x21, 0xcccccccd, &_);
-{ uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xfffffffb, &x55);
-{ uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
-{ uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
-{ uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
-{ uint32_t x67; uint32_t x66 = _mulx_u32(x51, 0x3fffff, &x67);
-{ uint32_t x69; uint8_t x70 = _addcarryx_u32(0x0, x55, x57, &x69);
-{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x58, x60, &x72);
-{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x61, x63, &x75);
-{ uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x64, x66, &x78);
-{ uint32_t x81; uint8_t _ = _addcarryx_u32(0x0, x79, x67, &x81);
-{ uint32_t _; uint8_t x85 = _addcarryx_u32(0x0, x21, x54, &_);
-{ uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x36, x69, &x87);
-{ uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x39, x72, &x90);
-{ uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x42, x75, &x93);
-{ uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x45, x78, &x96);
-{ uint32_t x99; uint8_t x100 = _addcarryx_u32(x97, x48, x81, &x99);
-{ uint32_t x103; uint32_t x102 = _mulx_u32(x7, x13, &x103);
-{ uint32_t x106; uint32_t x105 = _mulx_u32(x7, x15, &x106);
-{ uint32_t x109; uint32_t x108 = _mulx_u32(x7, x17, &x109);
-{ uint32_t x112; uint32_t x111 = _mulx_u32(x7, x19, &x112);
-{ uint32_t x115; uint32_t x114 = _mulx_u32(x7, x18, &x115);
-{ uint32_t x117; uint8_t x118 = _addcarryx_u32(0x0, x103, x105, &x117);
-{ uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
-{ uint32_t x123; uint8_t x124 = _addcarryx_u32(x121, x109, x111, &x123);
-{ uint32_t x126; uint8_t x127 = _addcarryx_u32(x124, x112, x114, &x126);
-{ uint32_t x129; uint8_t _ = _addcarryx_u32(0x0, x127, x115, &x129);
-{ uint32_t x132; uint8_t x133 = _addcarryx_u32(0x0, x87, x102, &x132);
-{ uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
-{ uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
-{ uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x96, x123, &x141);
-{ uint32_t x144; uint8_t x145 = _addcarryx_u32(x142, x99, x126, &x144);
-{ uint32_t x147; uint8_t x148 = _addcarryx_u32(x145, x100, x129, &x147);
-{ uint32_t _; uint32_t x150 = _mulx_u32(x132, 0xcccccccd, &_);
-{ uint32_t x154; uint32_t x153 = _mulx_u32(x150, 0xfffffffb, &x154);
-{ uint32_t x157; uint32_t x156 = _mulx_u32(x150, 0xffffffff, &x157);
-{ uint32_t x160; uint32_t x159 = _mulx_u32(x150, 0xffffffff, &x160);
-{ uint32_t x163; uint32_t x162 = _mulx_u32(x150, 0xffffffff, &x163);
-{ uint32_t x166; uint32_t x165 = _mulx_u32(x150, 0x3fffff, &x166);
-{ uint32_t x168; uint8_t x169 = _addcarryx_u32(0x0, x154, x156, &x168);
-{ uint32_t x171; uint8_t x172 = _addcarryx_u32(x169, x157, x159, &x171);
-{ uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x160, x162, &x174);
-{ uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x163, x165, &x177);
-{ uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x166, &x180);
-{ uint32_t _; uint8_t x184 = _addcarryx_u32(0x0, x132, x153, &_);
-{ uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x135, x168, &x186);
-{ uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x138, x171, &x189);
-{ uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x141, x174, &x192);
-{ uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x144, x177, &x195);
-{ uint32_t x198; uint8_t x199 = _addcarryx_u32(x196, x147, x180, &x198);
-{ uint8_t x200 = (x199 + x148);
-{ uint32_t x203; uint32_t x202 = _mulx_u32(x9, x13, &x203);
-{ uint32_t x206; uint32_t x205 = _mulx_u32(x9, x15, &x206);
-{ uint32_t x209; uint32_t x208 = _mulx_u32(x9, x17, &x209);
-{ uint32_t x212; uint32_t x211 = _mulx_u32(x9, x19, &x212);
-{ uint32_t x215; uint32_t x214 = _mulx_u32(x9, x18, &x215);
-{ uint32_t x217; uint8_t x218 = _addcarryx_u32(0x0, x203, x205, &x217);
-{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x206, x208, &x220);
-{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x209, x211, &x223);
-{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x212, x214, &x226);
-{ uint32_t x229; uint8_t _ = _addcarryx_u32(0x0, x227, x215, &x229);
-{ uint32_t x232; uint8_t x233 = _addcarryx_u32(0x0, x186, x202, &x232);
-{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x189, x217, &x235);
-{ uint32_t x238; uint8_t x239 = _addcarryx_u32(x236, x192, x220, &x238);
-{ uint32_t x241; uint8_t x242 = _addcarryx_u32(x239, x195, x223, &x241);
-{ uint32_t x244; uint8_t x245 = _addcarryx_u32(x242, x198, x226, &x244);
-{ uint32_t x247; uint8_t x248 = _addcarryx_u32(x245, x200, x229, &x247);
-{ uint32_t _; uint32_t x250 = _mulx_u32(x232, 0xcccccccd, &_);
-{ uint32_t x254; uint32_t x253 = _mulx_u32(x250, 0xfffffffb, &x254);
-{ uint32_t x257; uint32_t x256 = _mulx_u32(x250, 0xffffffff, &x257);
-{ uint32_t x260; uint32_t x259 = _mulx_u32(x250, 0xffffffff, &x260);
-{ uint32_t x263; uint32_t x262 = _mulx_u32(x250, 0xffffffff, &x263);
-{ uint32_t x266; uint32_t x265 = _mulx_u32(x250, 0x3fffff, &x266);
-{ uint32_t x268; uint8_t x269 = _addcarryx_u32(0x0, x254, x256, &x268);
-{ uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x257, x259, &x271);
-{ uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x260, x262, &x274);
-{ uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x263, x265, &x277);
-{ uint32_t x280; uint8_t _ = _addcarryx_u32(0x0, x278, x266, &x280);
-{ uint32_t _; uint8_t x284 = _addcarryx_u32(0x0, x232, x253, &_);
-{ uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x235, x268, &x286);
-{ uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x238, x271, &x289);
-{ uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x241, x274, &x292);
-{ uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x244, x277, &x295);
-{ uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x247, x280, &x298);
-{ uint8_t x300 = (x299 + x248);
-{ uint32_t x303; uint32_t x302 = _mulx_u32(x11, x13, &x303);
-{ uint32_t x306; uint32_t x305 = _mulx_u32(x11, x15, &x306);
-{ uint32_t x309; uint32_t x308 = _mulx_u32(x11, x17, &x309);
-{ uint32_t x312; uint32_t x311 = _mulx_u32(x11, x19, &x312);
-{ uint32_t x315; uint32_t x314 = _mulx_u32(x11, x18, &x315);
-{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x303, x305, &x317);
-{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x306, x308, &x320);
-{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x309, x311, &x323);
-{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x312, x314, &x326);
-{ uint32_t x329; uint8_t _ = _addcarryx_u32(0x0, x327, x315, &x329);
-{ uint32_t x332; uint8_t x333 = _addcarryx_u32(0x0, x286, x302, &x332);
-{ uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x289, x317, &x335);
-{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x292, x320, &x338);
-{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x295, x323, &x341);
-{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x298, x326, &x344);
-{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x300, x329, &x347);
-{ uint32_t _; uint32_t x350 = _mulx_u32(x332, 0xcccccccd, &_);
-{ uint32_t x354; uint32_t x353 = _mulx_u32(x350, 0xfffffffb, &x354);
-{ uint32_t x357; uint32_t x356 = _mulx_u32(x350, 0xffffffff, &x357);
-{ uint32_t x360; uint32_t x359 = _mulx_u32(x350, 0xffffffff, &x360);
-{ uint32_t x363; uint32_t x362 = _mulx_u32(x350, 0xffffffff, &x363);
-{ uint32_t x366; uint32_t x365 = _mulx_u32(x350, 0x3fffff, &x366);
-{ uint32_t x368; uint8_t x369 = _addcarryx_u32(0x0, x354, x356, &x368);
-{ uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x357, x359, &x371);
-{ uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x360, x362, &x374);
-{ uint32_t x377; uint8_t x378 = _addcarryx_u32(x375, x363, x365, &x377);
-{ uint32_t x380; uint8_t _ = _addcarryx_u32(0x0, x378, x366, &x380);
-{ uint32_t _; uint8_t x384 = _addcarryx_u32(0x0, x332, x353, &_);
-{ uint32_t x386; uint8_t x387 = _addcarryx_u32(x384, x335, x368, &x386);
-{ uint32_t x389; uint8_t x390 = _addcarryx_u32(x387, x338, x371, &x389);
-{ uint32_t x392; uint8_t x393 = _addcarryx_u32(x390, x341, x374, &x392);
-{ uint32_t x395; uint8_t x396 = _addcarryx_u32(x393, x344, x377, &x395);
-{ uint32_t x398; uint8_t x399 = _addcarryx_u32(x396, x347, x380, &x398);
-{ uint8_t x400 = (x399 + x348);
-{ uint32_t x403; uint32_t x402 = _mulx_u32(x10, x13, &x403);
-{ uint32_t x406; uint32_t x405 = _mulx_u32(x10, x15, &x406);
-{ uint32_t x409; uint32_t x408 = _mulx_u32(x10, x17, &x409);
-{ uint32_t x412; uint32_t x411 = _mulx_u32(x10, x19, &x412);
-{ uint32_t x415; uint32_t x414 = _mulx_u32(x10, x18, &x415);
-{ uint32_t x417; uint8_t x418 = _addcarryx_u32(0x0, x403, x405, &x417);
-{ uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x406, x408, &x420);
-{ uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x409, x411, &x423);
-{ uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x412, x414, &x426);
-{ uint32_t x429; uint8_t _ = _addcarryx_u32(0x0, x427, x415, &x429);
-{ uint32_t x432; uint8_t x433 = _addcarryx_u32(0x0, x386, x402, &x432);
-{ uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x389, x417, &x435);
-{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x392, x420, &x438);
-{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x395, x423, &x441);
-{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x398, x426, &x444);
-{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x400, x429, &x447);
-{ uint32_t _; uint32_t x450 = _mulx_u32(x432, 0xcccccccd, &_);
-{ uint32_t x454; uint32_t x453 = _mulx_u32(x450, 0xfffffffb, &x454);
-{ uint32_t x457; uint32_t x456 = _mulx_u32(x450, 0xffffffff, &x457);
-{ uint32_t x460; uint32_t x459 = _mulx_u32(x450, 0xffffffff, &x460);
-{ uint32_t x463; uint32_t x462 = _mulx_u32(x450, 0xffffffff, &x463);
-{ uint32_t x466; uint32_t x465 = _mulx_u32(x450, 0x3fffff, &x466);
-{ uint32_t x468; uint8_t x469 = _addcarryx_u32(0x0, x454, x456, &x468);
-{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x457, x459, &x471);
-{ uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x460, x462, &x474);
-{ uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x463, x465, &x477);
-{ uint32_t x480; uint8_t _ = _addcarryx_u32(0x0, x478, x466, &x480);
-{ uint32_t _; uint8_t x484 = _addcarryx_u32(0x0, x432, x453, &_);
-{ uint32_t x486; uint8_t x487 = _addcarryx_u32(x484, x435, x468, &x486);
-{ uint32_t x489; uint8_t x490 = _addcarryx_u32(x487, x438, x471, &x489);
-{ uint32_t x492; uint8_t x493 = _addcarryx_u32(x490, x441, x474, &x492);
-{ uint32_t x495; uint8_t x496 = _addcarryx_u32(x493, x444, x477, &x495);
-{ uint32_t x498; uint8_t x499 = _addcarryx_u32(x496, x447, x480, &x498);
-{ uint8_t x500 = (x499 + x448);
-{ uint32_t x502; uint8_t x503 = _subborrow_u32(0x0, x486, 0xfffffffb, &x502);
-{ uint32_t x505; uint8_t x506 = _subborrow_u32(x503, x489, 0xffffffff, &x505);
-{ uint32_t x508; uint8_t x509 = _subborrow_u32(x506, x492, 0xffffffff, &x508);
-{ uint32_t x511; uint8_t x512 = _subborrow_u32(x509, x495, 0xffffffff, &x511);
-{ uint32_t x514; uint8_t x515 = _subborrow_u32(x512, x498, 0x3fffff, &x514);
-{ uint32_t _; uint8_t x518 = _subborrow_u32(x515, x500, 0x0, &_);
-{ uint32_t x519 = cmovznz(x518, x514, x498);
-{ uint32_t x520 = cmovznz(x518, x511, x495);
-{ uint32_t x521 = cmovznz(x518, x508, x492);
-{ uint32_t x522 = cmovznz(x518, x505, x489);
-{ uint32_t x523 = cmovznz(x518, x502, x486);
-out[0] = x519;
-out[1] = x520;
-out[2] = x521;
-out[3] = x522;
-out[4] = x523;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
+ { uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
+ { uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
+ { uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
+ { uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
+ { uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
+ { uint32_t _; uint32_t x51 = _mulx_u32(x21, 0xcccccccd, &_);
+ { uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xfffffffb, &x55);
+ { uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
+ { uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
+ { uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
+ { uint32_t x67; uint32_t x66 = _mulx_u32(x51, 0x3fffff, &x67);
+ { uint32_t x69; uint8_t x70 = _addcarryx_u32(0x0, x55, x57, &x69);
+ { uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x58, x60, &x72);
+ { uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x61, x63, &x75);
+ { uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x64, x66, &x78);
+ { uint32_t x81; uint8_t _ = _addcarryx_u32(0x0, x79, x67, &x81);
+ { uint32_t _; uint8_t x85 = _addcarryx_u32(0x0, x21, x54, &_);
+ { uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x36, x69, &x87);
+ { uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x39, x72, &x90);
+ { uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x42, x75, &x93);
+ { uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x45, x78, &x96);
+ { uint32_t x99; uint8_t x100 = _addcarryx_u32(x97, x48, x81, &x99);
+ { uint32_t x103; uint32_t x102 = _mulx_u32(x7, x13, &x103);
+ { uint32_t x106; uint32_t x105 = _mulx_u32(x7, x15, &x106);
+ { uint32_t x109; uint32_t x108 = _mulx_u32(x7, x17, &x109);
+ { uint32_t x112; uint32_t x111 = _mulx_u32(x7, x19, &x112);
+ { uint32_t x115; uint32_t x114 = _mulx_u32(x7, x18, &x115);
+ { uint32_t x117; uint8_t x118 = _addcarryx_u32(0x0, x103, x105, &x117);
+ { uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
+ { uint32_t x123; uint8_t x124 = _addcarryx_u32(x121, x109, x111, &x123);
+ { uint32_t x126; uint8_t x127 = _addcarryx_u32(x124, x112, x114, &x126);
+ { uint32_t x129; uint8_t _ = _addcarryx_u32(0x0, x127, x115, &x129);
+ { uint32_t x132; uint8_t x133 = _addcarryx_u32(0x0, x87, x102, &x132);
+ { uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
+ { uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
+ { uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x96, x123, &x141);
+ { uint32_t x144; uint8_t x145 = _addcarryx_u32(x142, x99, x126, &x144);
+ { uint32_t x147; uint8_t x148 = _addcarryx_u32(x145, x100, x129, &x147);
+ { uint32_t _; uint32_t x150 = _mulx_u32(x132, 0xcccccccd, &_);
+ { uint32_t x154; uint32_t x153 = _mulx_u32(x150, 0xfffffffb, &x154);
+ { uint32_t x157; uint32_t x156 = _mulx_u32(x150, 0xffffffff, &x157);
+ { uint32_t x160; uint32_t x159 = _mulx_u32(x150, 0xffffffff, &x160);
+ { uint32_t x163; uint32_t x162 = _mulx_u32(x150, 0xffffffff, &x163);
+ { uint32_t x166; uint32_t x165 = _mulx_u32(x150, 0x3fffff, &x166);
+ { uint32_t x168; uint8_t x169 = _addcarryx_u32(0x0, x154, x156, &x168);
+ { uint32_t x171; uint8_t x172 = _addcarryx_u32(x169, x157, x159, &x171);
+ { uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x160, x162, &x174);
+ { uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x163, x165, &x177);
+ { uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x166, &x180);
+ { uint32_t _; uint8_t x184 = _addcarryx_u32(0x0, x132, x153, &_);
+ { uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x135, x168, &x186);
+ { uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x138, x171, &x189);
+ { uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x141, x174, &x192);
+ { uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x144, x177, &x195);
+ { uint32_t x198; uint8_t x199 = _addcarryx_u32(x196, x147, x180, &x198);
+ { uint8_t x200 = (x199 + x148);
+ { uint32_t x203; uint32_t x202 = _mulx_u32(x9, x13, &x203);
+ { uint32_t x206; uint32_t x205 = _mulx_u32(x9, x15, &x206);
+ { uint32_t x209; uint32_t x208 = _mulx_u32(x9, x17, &x209);
+ { uint32_t x212; uint32_t x211 = _mulx_u32(x9, x19, &x212);
+ { uint32_t x215; uint32_t x214 = _mulx_u32(x9, x18, &x215);
+ { uint32_t x217; uint8_t x218 = _addcarryx_u32(0x0, x203, x205, &x217);
+ { uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x206, x208, &x220);
+ { uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x209, x211, &x223);
+ { uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x212, x214, &x226);
+ { uint32_t x229; uint8_t _ = _addcarryx_u32(0x0, x227, x215, &x229);
+ { uint32_t x232; uint8_t x233 = _addcarryx_u32(0x0, x186, x202, &x232);
+ { uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x189, x217, &x235);
+ { uint32_t x238; uint8_t x239 = _addcarryx_u32(x236, x192, x220, &x238);
+ { uint32_t x241; uint8_t x242 = _addcarryx_u32(x239, x195, x223, &x241);
+ { uint32_t x244; uint8_t x245 = _addcarryx_u32(x242, x198, x226, &x244);
+ { uint32_t x247; uint8_t x248 = _addcarryx_u32(x245, x200, x229, &x247);
+ { uint32_t _; uint32_t x250 = _mulx_u32(x232, 0xcccccccd, &_);
+ { uint32_t x254; uint32_t x253 = _mulx_u32(x250, 0xfffffffb, &x254);
+ { uint32_t x257; uint32_t x256 = _mulx_u32(x250, 0xffffffff, &x257);
+ { uint32_t x260; uint32_t x259 = _mulx_u32(x250, 0xffffffff, &x260);
+ { uint32_t x263; uint32_t x262 = _mulx_u32(x250, 0xffffffff, &x263);
+ { uint32_t x266; uint32_t x265 = _mulx_u32(x250, 0x3fffff, &x266);
+ { uint32_t x268; uint8_t x269 = _addcarryx_u32(0x0, x254, x256, &x268);
+ { uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x257, x259, &x271);
+ { uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x260, x262, &x274);
+ { uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x263, x265, &x277);
+ { uint32_t x280; uint8_t _ = _addcarryx_u32(0x0, x278, x266, &x280);
+ { uint32_t _; uint8_t x284 = _addcarryx_u32(0x0, x232, x253, &_);
+ { uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x235, x268, &x286);
+ { uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x238, x271, &x289);
+ { uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x241, x274, &x292);
+ { uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x244, x277, &x295);
+ { uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x247, x280, &x298);
+ { uint8_t x300 = (x299 + x248);
+ { uint32_t x303; uint32_t x302 = _mulx_u32(x11, x13, &x303);
+ { uint32_t x306; uint32_t x305 = _mulx_u32(x11, x15, &x306);
+ { uint32_t x309; uint32_t x308 = _mulx_u32(x11, x17, &x309);
+ { uint32_t x312; uint32_t x311 = _mulx_u32(x11, x19, &x312);
+ { uint32_t x315; uint32_t x314 = _mulx_u32(x11, x18, &x315);
+ { uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x303, x305, &x317);
+ { uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x306, x308, &x320);
+ { uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x309, x311, &x323);
+ { uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x312, x314, &x326);
+ { uint32_t x329; uint8_t _ = _addcarryx_u32(0x0, x327, x315, &x329);
+ { uint32_t x332; uint8_t x333 = _addcarryx_u32(0x0, x286, x302, &x332);
+ { uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x289, x317, &x335);
+ { uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x292, x320, &x338);
+ { uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x295, x323, &x341);
+ { uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x298, x326, &x344);
+ { uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x300, x329, &x347);
+ { uint32_t _; uint32_t x350 = _mulx_u32(x332, 0xcccccccd, &_);
+ { uint32_t x354; uint32_t x353 = _mulx_u32(x350, 0xfffffffb, &x354);
+ { uint32_t x357; uint32_t x356 = _mulx_u32(x350, 0xffffffff, &x357);
+ { uint32_t x360; uint32_t x359 = _mulx_u32(x350, 0xffffffff, &x360);
+ { uint32_t x363; uint32_t x362 = _mulx_u32(x350, 0xffffffff, &x363);
+ { uint32_t x366; uint32_t x365 = _mulx_u32(x350, 0x3fffff, &x366);
+ { uint32_t x368; uint8_t x369 = _addcarryx_u32(0x0, x354, x356, &x368);
+ { uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x357, x359, &x371);
+ { uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x360, x362, &x374);
+ { uint32_t x377; uint8_t x378 = _addcarryx_u32(x375, x363, x365, &x377);
+ { uint32_t x380; uint8_t _ = _addcarryx_u32(0x0, x378, x366, &x380);
+ { uint32_t _; uint8_t x384 = _addcarryx_u32(0x0, x332, x353, &_);
+ { uint32_t x386; uint8_t x387 = _addcarryx_u32(x384, x335, x368, &x386);
+ { uint32_t x389; uint8_t x390 = _addcarryx_u32(x387, x338, x371, &x389);
+ { uint32_t x392; uint8_t x393 = _addcarryx_u32(x390, x341, x374, &x392);
+ { uint32_t x395; uint8_t x396 = _addcarryx_u32(x393, x344, x377, &x395);
+ { uint32_t x398; uint8_t x399 = _addcarryx_u32(x396, x347, x380, &x398);
+ { uint8_t x400 = (x399 + x348);
+ { uint32_t x403; uint32_t x402 = _mulx_u32(x10, x13, &x403);
+ { uint32_t x406; uint32_t x405 = _mulx_u32(x10, x15, &x406);
+ { uint32_t x409; uint32_t x408 = _mulx_u32(x10, x17, &x409);
+ { uint32_t x412; uint32_t x411 = _mulx_u32(x10, x19, &x412);
+ { uint32_t x415; uint32_t x414 = _mulx_u32(x10, x18, &x415);
+ { uint32_t x417; uint8_t x418 = _addcarryx_u32(0x0, x403, x405, &x417);
+ { uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x406, x408, &x420);
+ { uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x409, x411, &x423);
+ { uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x412, x414, &x426);
+ { uint32_t x429; uint8_t _ = _addcarryx_u32(0x0, x427, x415, &x429);
+ { uint32_t x432; uint8_t x433 = _addcarryx_u32(0x0, x386, x402, &x432);
+ { uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x389, x417, &x435);
+ { uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x392, x420, &x438);
+ { uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x395, x423, &x441);
+ { uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x398, x426, &x444);
+ { uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x400, x429, &x447);
+ { uint32_t _; uint32_t x450 = _mulx_u32(x432, 0xcccccccd, &_);
+ { uint32_t x454; uint32_t x453 = _mulx_u32(x450, 0xfffffffb, &x454);
+ { uint32_t x457; uint32_t x456 = _mulx_u32(x450, 0xffffffff, &x457);
+ { uint32_t x460; uint32_t x459 = _mulx_u32(x450, 0xffffffff, &x460);
+ { uint32_t x463; uint32_t x462 = _mulx_u32(x450, 0xffffffff, &x463);
+ { uint32_t x466; uint32_t x465 = _mulx_u32(x450, 0x3fffff, &x466);
+ { uint32_t x468; uint8_t x469 = _addcarryx_u32(0x0, x454, x456, &x468);
+ { uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x457, x459, &x471);
+ { uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x460, x462, &x474);
+ { uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x463, x465, &x477);
+ { uint32_t x480; uint8_t _ = _addcarryx_u32(0x0, x478, x466, &x480);
+ { uint32_t _; uint8_t x484 = _addcarryx_u32(0x0, x432, x453, &_);
+ { uint32_t x486; uint8_t x487 = _addcarryx_u32(x484, x435, x468, &x486);
+ { uint32_t x489; uint8_t x490 = _addcarryx_u32(x487, x438, x471, &x489);
+ { uint32_t x492; uint8_t x493 = _addcarryx_u32(x490, x441, x474, &x492);
+ { uint32_t x495; uint8_t x496 = _addcarryx_u32(x493, x444, x477, &x495);
+ { uint32_t x498; uint8_t x499 = _addcarryx_u32(x496, x447, x480, &x498);
+ { uint8_t x500 = (x499 + x448);
+ { uint32_t x502; uint8_t x503 = _subborrow_u32(0x0, x486, 0xfffffffb, &x502);
+ { uint32_t x505; uint8_t x506 = _subborrow_u32(x503, x489, 0xffffffff, &x505);
+ { uint32_t x508; uint8_t x509 = _subborrow_u32(x506, x492, 0xffffffff, &x508);
+ { uint32_t x511; uint8_t x512 = _subborrow_u32(x509, x495, 0xffffffff, &x511);
+ { uint32_t x514; uint8_t x515 = _subborrow_u32(x512, x498, 0x3fffff, &x514);
+ { uint32_t _; uint8_t x518 = _subborrow_u32(x515, x500, 0x0, &_);
+ { uint32_t x519 = cmovznz(x518, x514, x498);
+ { uint32_t x520 = cmovznz(x518, x511, x495);
+ { uint32_t x521 = cmovznz(x518, x508, x492);
+ { uint32_t x522 = cmovznz(x518, x505, x489);
+ { uint32_t x523 = cmovznz(x518, x502, x486);
+ out[0] = x523;
+ out[1] = x522;
+ out[2] = x521;
+ out[3] = x520;
+ out[4] = x519;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e150m5/fenz.c b/src/Specific/montgomery32_2e150m5/fenz.c
index 755695e18..5601e8c15 100644
--- a/src/Specific/montgomery32_2e150m5/fenz.c
+++ b/src/Specific/montgomery32_2e150m5/fenz.c
@@ -1,26 +1,13 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x9 = (x8 | x7);
-{ uint32_t x10 = (x6 | x9);
-{ uint32_t x11 = (x4 | x10);
-{ uint32_t x12 = (x2 | x11);
-out[0] = x12;
-}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[5]) {
+ { const uint32_t x7 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x9 = (x8 | x7);
+ { uint32_t x10 = (x6 | x9);
+ { uint32_t x11 = (x4 | x10);
+ { uint32_t x12 = (x2 | x11);
+ out[0] = x12;
+ }}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e150m5/feopp.c b/src/Specific/montgomery32_2e150m5/feopp.c
index e4f714108..0484af931 100644
--- a/src/Specific/montgomery32_2e150m5/feopp.c
+++ b/src/Specific/montgomery32_2e150m5/feopp.c
@@ -1,42 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
-{ uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
-{ uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
-{ uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
-{ uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
-{ uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
-{ uint32_t x25 = (x24 & 0xfffffffb);
-{ uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
-{ uint32_t x29 = (x24 & 0xffffffff);
-{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
-{ uint32_t x33 = (x24 & 0xffffffff);
-{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
-{ uint32_t x37 = (x24 & 0xffffffff);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
-{ uint32_t x41 = (x24 & 0x3fffff);
-{ uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-out[4] = x27;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feopp(uint32_t out[5], const uint32_t in1[5]) {
+ { const uint32_t x7 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
+ { uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
+ { uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
+ { uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
+ { uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
+ { uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
+ { uint32_t x25 = (x24 & 0xfffffffb);
+ { uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
+ { uint32_t x29 = (x24 & 0xffffffff);
+ { uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
+ { uint32_t x33 = (x24 & 0xffffffff);
+ { uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
+ { uint32_t x37 = (x24 & 0xffffffff);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
+ { uint32_t x41 = (x24 & 0x3fffff);
+ { uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e150m5/fesub.c b/src/Specific/montgomery32_2e150m5/fesub.c
index 4236fbd26..f3fd6956d 100644
--- a/src/Specific/montgomery32_2e150m5/fesub.c
+++ b/src/Specific/montgomery32_2e150m5/fesub.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
-{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
-{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
-{ uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
-{ uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
-{ uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
-{ uint32_t x36 = (x35 & 0xfffffffb);
-{ uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
-{ uint32_t x40 = (x35 & 0xffffffff);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
-{ uint32_t x44 = (x35 & 0xffffffff);
-{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
-{ uint32_t x48 = (x35 & 0xffffffff);
-{ uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
-{ uint32_t x52 = (x35 & 0x3fffff);
-{ uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
-out[0] = x54;
-out[1] = x50;
-out[2] = x46;
-out[3] = x42;
-out[4] = x38;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesub(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
+ { uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
+ { uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
+ { uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
+ { uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
+ { uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
+ { uint32_t x36 = (x35 & 0xfffffffb);
+ { uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
+ { uint32_t x40 = (x35 & 0xffffffff);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
+ { uint32_t x44 = (x35 & 0xffffffff);
+ { uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
+ { uint32_t x48 = (x35 & 0xffffffff);
+ { uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
+ { uint32_t x52 = (x35 & 0x3fffff);
+ { uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
+ out[0] = x38;
+ out[1] = x42;
+ out[2] = x46;
+ out[3] = x50;
+ out[4] = x54;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e152m17/feadd.c b/src/Specific/montgomery32_2e152m17/feadd.c
index 92a4e0d5f..3d9b9281a 100644
--- a/src/Specific/montgomery32_2e152m17/feadd.c
+++ b/src/Specific/montgomery32_2e152m17/feadd.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
-{ uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
-{ uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
-{ uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
-{ uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
-{ uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xffffffef, &x36);
-{ uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
-{ uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
-{ uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
-{ uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0xffffff, &x48);
-{ uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
-{ uint32_t x53 = cmovznz(x52, x48, x33);
-{ uint32_t x54 = cmovznz(x52, x45, x30);
-{ uint32_t x55 = cmovznz(x52, x42, x27);
-{ uint32_t x56 = cmovznz(x52, x39, x24);
-{ uint32_t x57 = cmovznz(x52, x36, x21);
-out[0] = x53;
-out[1] = x54;
-out[2] = x55;
-out[3] = x56;
-out[4] = x57;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feadd(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
+ { uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
+ { uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
+ { uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
+ { uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
+ { uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xffffffef, &x36);
+ { uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
+ { uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
+ { uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
+ { uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0xffffff, &x48);
+ { uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
+ { uint32_t x53 = cmovznz(x52, x48, x33);
+ { uint32_t x54 = cmovznz(x52, x45, x30);
+ { uint32_t x55 = cmovznz(x52, x42, x27);
+ { uint32_t x56 = cmovznz(x52, x39, x24);
+ { uint32_t x57 = cmovznz(x52, x36, x21);
+ out[0] = x57;
+ out[1] = x56;
+ out[2] = x55;
+ out[3] = x54;
+ out[4] = x53;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e152m17/femul.c b/src/Specific/montgomery32_2e152m17/femul.c
index 6b1678812..2e18b2d18 100644
--- a/src/Specific/montgomery32_2e152m17/femul.c
+++ b/src/Specific/montgomery32_2e152m17/femul.c
@@ -1,200 +1,192 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
-{ uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
-{ uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
-{ uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
-{ uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
-{ uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
-{ uint32_t _; uint32_t x51 = _mulx_u32(x21, 0xf0f0f0f1, &_);
-{ uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xffffffef, &x55);
-{ uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
-{ uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
-{ uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
-{ uint32_t x67; uint32_t x66 = _mulx_u32(x51, 0xffffff, &x67);
-{ uint32_t x69; uint8_t x70 = _addcarryx_u32(0x0, x55, x57, &x69);
-{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x58, x60, &x72);
-{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x61, x63, &x75);
-{ uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x64, x66, &x78);
-{ uint32_t x81; uint8_t _ = _addcarryx_u32(0x0, x79, x67, &x81);
-{ uint32_t _; uint8_t x85 = _addcarryx_u32(0x0, x21, x54, &_);
-{ uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x36, x69, &x87);
-{ uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x39, x72, &x90);
-{ uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x42, x75, &x93);
-{ uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x45, x78, &x96);
-{ uint32_t x99; uint8_t x100 = _addcarryx_u32(x97, x48, x81, &x99);
-{ uint32_t x103; uint32_t x102 = _mulx_u32(x7, x13, &x103);
-{ uint32_t x106; uint32_t x105 = _mulx_u32(x7, x15, &x106);
-{ uint32_t x109; uint32_t x108 = _mulx_u32(x7, x17, &x109);
-{ uint32_t x112; uint32_t x111 = _mulx_u32(x7, x19, &x112);
-{ uint32_t x115; uint32_t x114 = _mulx_u32(x7, x18, &x115);
-{ uint32_t x117; uint8_t x118 = _addcarryx_u32(0x0, x103, x105, &x117);
-{ uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
-{ uint32_t x123; uint8_t x124 = _addcarryx_u32(x121, x109, x111, &x123);
-{ uint32_t x126; uint8_t x127 = _addcarryx_u32(x124, x112, x114, &x126);
-{ uint32_t x129; uint8_t _ = _addcarryx_u32(0x0, x127, x115, &x129);
-{ uint32_t x132; uint8_t x133 = _addcarryx_u32(0x0, x87, x102, &x132);
-{ uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
-{ uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
-{ uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x96, x123, &x141);
-{ uint32_t x144; uint8_t x145 = _addcarryx_u32(x142, x99, x126, &x144);
-{ uint32_t x147; uint8_t x148 = _addcarryx_u32(x145, x100, x129, &x147);
-{ uint32_t _; uint32_t x150 = _mulx_u32(x132, 0xf0f0f0f1, &_);
-{ uint32_t x154; uint32_t x153 = _mulx_u32(x150, 0xffffffef, &x154);
-{ uint32_t x157; uint32_t x156 = _mulx_u32(x150, 0xffffffff, &x157);
-{ uint32_t x160; uint32_t x159 = _mulx_u32(x150, 0xffffffff, &x160);
-{ uint32_t x163; uint32_t x162 = _mulx_u32(x150, 0xffffffff, &x163);
-{ uint32_t x166; uint32_t x165 = _mulx_u32(x150, 0xffffff, &x166);
-{ uint32_t x168; uint8_t x169 = _addcarryx_u32(0x0, x154, x156, &x168);
-{ uint32_t x171; uint8_t x172 = _addcarryx_u32(x169, x157, x159, &x171);
-{ uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x160, x162, &x174);
-{ uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x163, x165, &x177);
-{ uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x166, &x180);
-{ uint32_t _; uint8_t x184 = _addcarryx_u32(0x0, x132, x153, &_);
-{ uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x135, x168, &x186);
-{ uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x138, x171, &x189);
-{ uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x141, x174, &x192);
-{ uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x144, x177, &x195);
-{ uint32_t x198; uint8_t x199 = _addcarryx_u32(x196, x147, x180, &x198);
-{ uint8_t x200 = (x199 + x148);
-{ uint32_t x203; uint32_t x202 = _mulx_u32(x9, x13, &x203);
-{ uint32_t x206; uint32_t x205 = _mulx_u32(x9, x15, &x206);
-{ uint32_t x209; uint32_t x208 = _mulx_u32(x9, x17, &x209);
-{ uint32_t x212; uint32_t x211 = _mulx_u32(x9, x19, &x212);
-{ uint32_t x215; uint32_t x214 = _mulx_u32(x9, x18, &x215);
-{ uint32_t x217; uint8_t x218 = _addcarryx_u32(0x0, x203, x205, &x217);
-{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x206, x208, &x220);
-{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x209, x211, &x223);
-{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x212, x214, &x226);
-{ uint32_t x229; uint8_t _ = _addcarryx_u32(0x0, x227, x215, &x229);
-{ uint32_t x232; uint8_t x233 = _addcarryx_u32(0x0, x186, x202, &x232);
-{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x189, x217, &x235);
-{ uint32_t x238; uint8_t x239 = _addcarryx_u32(x236, x192, x220, &x238);
-{ uint32_t x241; uint8_t x242 = _addcarryx_u32(x239, x195, x223, &x241);
-{ uint32_t x244; uint8_t x245 = _addcarryx_u32(x242, x198, x226, &x244);
-{ uint32_t x247; uint8_t x248 = _addcarryx_u32(x245, x200, x229, &x247);
-{ uint32_t _; uint32_t x250 = _mulx_u32(x232, 0xf0f0f0f1, &_);
-{ uint32_t x254; uint32_t x253 = _mulx_u32(x250, 0xffffffef, &x254);
-{ uint32_t x257; uint32_t x256 = _mulx_u32(x250, 0xffffffff, &x257);
-{ uint32_t x260; uint32_t x259 = _mulx_u32(x250, 0xffffffff, &x260);
-{ uint32_t x263; uint32_t x262 = _mulx_u32(x250, 0xffffffff, &x263);
-{ uint32_t x266; uint32_t x265 = _mulx_u32(x250, 0xffffff, &x266);
-{ uint32_t x268; uint8_t x269 = _addcarryx_u32(0x0, x254, x256, &x268);
-{ uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x257, x259, &x271);
-{ uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x260, x262, &x274);
-{ uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x263, x265, &x277);
-{ uint32_t x280; uint8_t _ = _addcarryx_u32(0x0, x278, x266, &x280);
-{ uint32_t _; uint8_t x284 = _addcarryx_u32(0x0, x232, x253, &_);
-{ uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x235, x268, &x286);
-{ uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x238, x271, &x289);
-{ uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x241, x274, &x292);
-{ uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x244, x277, &x295);
-{ uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x247, x280, &x298);
-{ uint8_t x300 = (x299 + x248);
-{ uint32_t x303; uint32_t x302 = _mulx_u32(x11, x13, &x303);
-{ uint32_t x306; uint32_t x305 = _mulx_u32(x11, x15, &x306);
-{ uint32_t x309; uint32_t x308 = _mulx_u32(x11, x17, &x309);
-{ uint32_t x312; uint32_t x311 = _mulx_u32(x11, x19, &x312);
-{ uint32_t x315; uint32_t x314 = _mulx_u32(x11, x18, &x315);
-{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x303, x305, &x317);
-{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x306, x308, &x320);
-{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x309, x311, &x323);
-{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x312, x314, &x326);
-{ uint32_t x329; uint8_t _ = _addcarryx_u32(0x0, x327, x315, &x329);
-{ uint32_t x332; uint8_t x333 = _addcarryx_u32(0x0, x286, x302, &x332);
-{ uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x289, x317, &x335);
-{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x292, x320, &x338);
-{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x295, x323, &x341);
-{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x298, x326, &x344);
-{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x300, x329, &x347);
-{ uint32_t _; uint32_t x350 = _mulx_u32(x332, 0xf0f0f0f1, &_);
-{ uint32_t x354; uint32_t x353 = _mulx_u32(x350, 0xffffffef, &x354);
-{ uint32_t x357; uint32_t x356 = _mulx_u32(x350, 0xffffffff, &x357);
-{ uint32_t x360; uint32_t x359 = _mulx_u32(x350, 0xffffffff, &x360);
-{ uint32_t x363; uint32_t x362 = _mulx_u32(x350, 0xffffffff, &x363);
-{ uint32_t x366; uint32_t x365 = _mulx_u32(x350, 0xffffff, &x366);
-{ uint32_t x368; uint8_t x369 = _addcarryx_u32(0x0, x354, x356, &x368);
-{ uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x357, x359, &x371);
-{ uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x360, x362, &x374);
-{ uint32_t x377; uint8_t x378 = _addcarryx_u32(x375, x363, x365, &x377);
-{ uint32_t x380; uint8_t _ = _addcarryx_u32(0x0, x378, x366, &x380);
-{ uint32_t _; uint8_t x384 = _addcarryx_u32(0x0, x332, x353, &_);
-{ uint32_t x386; uint8_t x387 = _addcarryx_u32(x384, x335, x368, &x386);
-{ uint32_t x389; uint8_t x390 = _addcarryx_u32(x387, x338, x371, &x389);
-{ uint32_t x392; uint8_t x393 = _addcarryx_u32(x390, x341, x374, &x392);
-{ uint32_t x395; uint8_t x396 = _addcarryx_u32(x393, x344, x377, &x395);
-{ uint32_t x398; uint8_t x399 = _addcarryx_u32(x396, x347, x380, &x398);
-{ uint8_t x400 = (x399 + x348);
-{ uint32_t x403; uint32_t x402 = _mulx_u32(x10, x13, &x403);
-{ uint32_t x406; uint32_t x405 = _mulx_u32(x10, x15, &x406);
-{ uint32_t x409; uint32_t x408 = _mulx_u32(x10, x17, &x409);
-{ uint32_t x412; uint32_t x411 = _mulx_u32(x10, x19, &x412);
-{ uint32_t x415; uint32_t x414 = _mulx_u32(x10, x18, &x415);
-{ uint32_t x417; uint8_t x418 = _addcarryx_u32(0x0, x403, x405, &x417);
-{ uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x406, x408, &x420);
-{ uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x409, x411, &x423);
-{ uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x412, x414, &x426);
-{ uint32_t x429; uint8_t _ = _addcarryx_u32(0x0, x427, x415, &x429);
-{ uint32_t x432; uint8_t x433 = _addcarryx_u32(0x0, x386, x402, &x432);
-{ uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x389, x417, &x435);
-{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x392, x420, &x438);
-{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x395, x423, &x441);
-{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x398, x426, &x444);
-{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x400, x429, &x447);
-{ uint32_t _; uint32_t x450 = _mulx_u32(x432, 0xf0f0f0f1, &_);
-{ uint32_t x454; uint32_t x453 = _mulx_u32(x450, 0xffffffef, &x454);
-{ uint32_t x457; uint32_t x456 = _mulx_u32(x450, 0xffffffff, &x457);
-{ uint32_t x460; uint32_t x459 = _mulx_u32(x450, 0xffffffff, &x460);
-{ uint32_t x463; uint32_t x462 = _mulx_u32(x450, 0xffffffff, &x463);
-{ uint32_t x466; uint32_t x465 = _mulx_u32(x450, 0xffffff, &x466);
-{ uint32_t x468; uint8_t x469 = _addcarryx_u32(0x0, x454, x456, &x468);
-{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x457, x459, &x471);
-{ uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x460, x462, &x474);
-{ uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x463, x465, &x477);
-{ uint32_t x480; uint8_t _ = _addcarryx_u32(0x0, x478, x466, &x480);
-{ uint32_t _; uint8_t x484 = _addcarryx_u32(0x0, x432, x453, &_);
-{ uint32_t x486; uint8_t x487 = _addcarryx_u32(x484, x435, x468, &x486);
-{ uint32_t x489; uint8_t x490 = _addcarryx_u32(x487, x438, x471, &x489);
-{ uint32_t x492; uint8_t x493 = _addcarryx_u32(x490, x441, x474, &x492);
-{ uint32_t x495; uint8_t x496 = _addcarryx_u32(x493, x444, x477, &x495);
-{ uint32_t x498; uint8_t x499 = _addcarryx_u32(x496, x447, x480, &x498);
-{ uint8_t x500 = (x499 + x448);
-{ uint32_t x502; uint8_t x503 = _subborrow_u32(0x0, x486, 0xffffffef, &x502);
-{ uint32_t x505; uint8_t x506 = _subborrow_u32(x503, x489, 0xffffffff, &x505);
-{ uint32_t x508; uint8_t x509 = _subborrow_u32(x506, x492, 0xffffffff, &x508);
-{ uint32_t x511; uint8_t x512 = _subborrow_u32(x509, x495, 0xffffffff, &x511);
-{ uint32_t x514; uint8_t x515 = _subborrow_u32(x512, x498, 0xffffff, &x514);
-{ uint32_t _; uint8_t x518 = _subborrow_u32(x515, x500, 0x0, &_);
-{ uint32_t x519 = cmovznz(x518, x514, x498);
-{ uint32_t x520 = cmovznz(x518, x511, x495);
-{ uint32_t x521 = cmovznz(x518, x508, x492);
-{ uint32_t x522 = cmovznz(x518, x505, x489);
-{ uint32_t x523 = cmovznz(x518, x502, x486);
-out[0] = x519;
-out[1] = x520;
-out[2] = x521;
-out[3] = x522;
-out[4] = x523;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
+ { uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
+ { uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
+ { uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
+ { uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
+ { uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
+ { uint32_t _; uint32_t x51 = _mulx_u32(x21, 0xf0f0f0f1, &_);
+ { uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xffffffef, &x55);
+ { uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
+ { uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
+ { uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
+ { uint32_t x67; uint32_t x66 = _mulx_u32(x51, 0xffffff, &x67);
+ { uint32_t x69; uint8_t x70 = _addcarryx_u32(0x0, x55, x57, &x69);
+ { uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x58, x60, &x72);
+ { uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x61, x63, &x75);
+ { uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x64, x66, &x78);
+ { uint32_t x81; uint8_t _ = _addcarryx_u32(0x0, x79, x67, &x81);
+ { uint32_t _; uint8_t x85 = _addcarryx_u32(0x0, x21, x54, &_);
+ { uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x36, x69, &x87);
+ { uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x39, x72, &x90);
+ { uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x42, x75, &x93);
+ { uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x45, x78, &x96);
+ { uint32_t x99; uint8_t x100 = _addcarryx_u32(x97, x48, x81, &x99);
+ { uint32_t x103; uint32_t x102 = _mulx_u32(x7, x13, &x103);
+ { uint32_t x106; uint32_t x105 = _mulx_u32(x7, x15, &x106);
+ { uint32_t x109; uint32_t x108 = _mulx_u32(x7, x17, &x109);
+ { uint32_t x112; uint32_t x111 = _mulx_u32(x7, x19, &x112);
+ { uint32_t x115; uint32_t x114 = _mulx_u32(x7, x18, &x115);
+ { uint32_t x117; uint8_t x118 = _addcarryx_u32(0x0, x103, x105, &x117);
+ { uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
+ { uint32_t x123; uint8_t x124 = _addcarryx_u32(x121, x109, x111, &x123);
+ { uint32_t x126; uint8_t x127 = _addcarryx_u32(x124, x112, x114, &x126);
+ { uint32_t x129; uint8_t _ = _addcarryx_u32(0x0, x127, x115, &x129);
+ { uint32_t x132; uint8_t x133 = _addcarryx_u32(0x0, x87, x102, &x132);
+ { uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
+ { uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
+ { uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x96, x123, &x141);
+ { uint32_t x144; uint8_t x145 = _addcarryx_u32(x142, x99, x126, &x144);
+ { uint32_t x147; uint8_t x148 = _addcarryx_u32(x145, x100, x129, &x147);
+ { uint32_t _; uint32_t x150 = _mulx_u32(x132, 0xf0f0f0f1, &_);
+ { uint32_t x154; uint32_t x153 = _mulx_u32(x150, 0xffffffef, &x154);
+ { uint32_t x157; uint32_t x156 = _mulx_u32(x150, 0xffffffff, &x157);
+ { uint32_t x160; uint32_t x159 = _mulx_u32(x150, 0xffffffff, &x160);
+ { uint32_t x163; uint32_t x162 = _mulx_u32(x150, 0xffffffff, &x163);
+ { uint32_t x166; uint32_t x165 = _mulx_u32(x150, 0xffffff, &x166);
+ { uint32_t x168; uint8_t x169 = _addcarryx_u32(0x0, x154, x156, &x168);
+ { uint32_t x171; uint8_t x172 = _addcarryx_u32(x169, x157, x159, &x171);
+ { uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x160, x162, &x174);
+ { uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x163, x165, &x177);
+ { uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x166, &x180);
+ { uint32_t _; uint8_t x184 = _addcarryx_u32(0x0, x132, x153, &_);
+ { uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x135, x168, &x186);
+ { uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x138, x171, &x189);
+ { uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x141, x174, &x192);
+ { uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x144, x177, &x195);
+ { uint32_t x198; uint8_t x199 = _addcarryx_u32(x196, x147, x180, &x198);
+ { uint8_t x200 = (x199 + x148);
+ { uint32_t x203; uint32_t x202 = _mulx_u32(x9, x13, &x203);
+ { uint32_t x206; uint32_t x205 = _mulx_u32(x9, x15, &x206);
+ { uint32_t x209; uint32_t x208 = _mulx_u32(x9, x17, &x209);
+ { uint32_t x212; uint32_t x211 = _mulx_u32(x9, x19, &x212);
+ { uint32_t x215; uint32_t x214 = _mulx_u32(x9, x18, &x215);
+ { uint32_t x217; uint8_t x218 = _addcarryx_u32(0x0, x203, x205, &x217);
+ { uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x206, x208, &x220);
+ { uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x209, x211, &x223);
+ { uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x212, x214, &x226);
+ { uint32_t x229; uint8_t _ = _addcarryx_u32(0x0, x227, x215, &x229);
+ { uint32_t x232; uint8_t x233 = _addcarryx_u32(0x0, x186, x202, &x232);
+ { uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x189, x217, &x235);
+ { uint32_t x238; uint8_t x239 = _addcarryx_u32(x236, x192, x220, &x238);
+ { uint32_t x241; uint8_t x242 = _addcarryx_u32(x239, x195, x223, &x241);
+ { uint32_t x244; uint8_t x245 = _addcarryx_u32(x242, x198, x226, &x244);
+ { uint32_t x247; uint8_t x248 = _addcarryx_u32(x245, x200, x229, &x247);
+ { uint32_t _; uint32_t x250 = _mulx_u32(x232, 0xf0f0f0f1, &_);
+ { uint32_t x254; uint32_t x253 = _mulx_u32(x250, 0xffffffef, &x254);
+ { uint32_t x257; uint32_t x256 = _mulx_u32(x250, 0xffffffff, &x257);
+ { uint32_t x260; uint32_t x259 = _mulx_u32(x250, 0xffffffff, &x260);
+ { uint32_t x263; uint32_t x262 = _mulx_u32(x250, 0xffffffff, &x263);
+ { uint32_t x266; uint32_t x265 = _mulx_u32(x250, 0xffffff, &x266);
+ { uint32_t x268; uint8_t x269 = _addcarryx_u32(0x0, x254, x256, &x268);
+ { uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x257, x259, &x271);
+ { uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x260, x262, &x274);
+ { uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x263, x265, &x277);
+ { uint32_t x280; uint8_t _ = _addcarryx_u32(0x0, x278, x266, &x280);
+ { uint32_t _; uint8_t x284 = _addcarryx_u32(0x0, x232, x253, &_);
+ { uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x235, x268, &x286);
+ { uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x238, x271, &x289);
+ { uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x241, x274, &x292);
+ { uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x244, x277, &x295);
+ { uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x247, x280, &x298);
+ { uint8_t x300 = (x299 + x248);
+ { uint32_t x303; uint32_t x302 = _mulx_u32(x11, x13, &x303);
+ { uint32_t x306; uint32_t x305 = _mulx_u32(x11, x15, &x306);
+ { uint32_t x309; uint32_t x308 = _mulx_u32(x11, x17, &x309);
+ { uint32_t x312; uint32_t x311 = _mulx_u32(x11, x19, &x312);
+ { uint32_t x315; uint32_t x314 = _mulx_u32(x11, x18, &x315);
+ { uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x303, x305, &x317);
+ { uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x306, x308, &x320);
+ { uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x309, x311, &x323);
+ { uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x312, x314, &x326);
+ { uint32_t x329; uint8_t _ = _addcarryx_u32(0x0, x327, x315, &x329);
+ { uint32_t x332; uint8_t x333 = _addcarryx_u32(0x0, x286, x302, &x332);
+ { uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x289, x317, &x335);
+ { uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x292, x320, &x338);
+ { uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x295, x323, &x341);
+ { uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x298, x326, &x344);
+ { uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x300, x329, &x347);
+ { uint32_t _; uint32_t x350 = _mulx_u32(x332, 0xf0f0f0f1, &_);
+ { uint32_t x354; uint32_t x353 = _mulx_u32(x350, 0xffffffef, &x354);
+ { uint32_t x357; uint32_t x356 = _mulx_u32(x350, 0xffffffff, &x357);
+ { uint32_t x360; uint32_t x359 = _mulx_u32(x350, 0xffffffff, &x360);
+ { uint32_t x363; uint32_t x362 = _mulx_u32(x350, 0xffffffff, &x363);
+ { uint32_t x366; uint32_t x365 = _mulx_u32(x350, 0xffffff, &x366);
+ { uint32_t x368; uint8_t x369 = _addcarryx_u32(0x0, x354, x356, &x368);
+ { uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x357, x359, &x371);
+ { uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x360, x362, &x374);
+ { uint32_t x377; uint8_t x378 = _addcarryx_u32(x375, x363, x365, &x377);
+ { uint32_t x380; uint8_t _ = _addcarryx_u32(0x0, x378, x366, &x380);
+ { uint32_t _; uint8_t x384 = _addcarryx_u32(0x0, x332, x353, &_);
+ { uint32_t x386; uint8_t x387 = _addcarryx_u32(x384, x335, x368, &x386);
+ { uint32_t x389; uint8_t x390 = _addcarryx_u32(x387, x338, x371, &x389);
+ { uint32_t x392; uint8_t x393 = _addcarryx_u32(x390, x341, x374, &x392);
+ { uint32_t x395; uint8_t x396 = _addcarryx_u32(x393, x344, x377, &x395);
+ { uint32_t x398; uint8_t x399 = _addcarryx_u32(x396, x347, x380, &x398);
+ { uint8_t x400 = (x399 + x348);
+ { uint32_t x403; uint32_t x402 = _mulx_u32(x10, x13, &x403);
+ { uint32_t x406; uint32_t x405 = _mulx_u32(x10, x15, &x406);
+ { uint32_t x409; uint32_t x408 = _mulx_u32(x10, x17, &x409);
+ { uint32_t x412; uint32_t x411 = _mulx_u32(x10, x19, &x412);
+ { uint32_t x415; uint32_t x414 = _mulx_u32(x10, x18, &x415);
+ { uint32_t x417; uint8_t x418 = _addcarryx_u32(0x0, x403, x405, &x417);
+ { uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x406, x408, &x420);
+ { uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x409, x411, &x423);
+ { uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x412, x414, &x426);
+ { uint32_t x429; uint8_t _ = _addcarryx_u32(0x0, x427, x415, &x429);
+ { uint32_t x432; uint8_t x433 = _addcarryx_u32(0x0, x386, x402, &x432);
+ { uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x389, x417, &x435);
+ { uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x392, x420, &x438);
+ { uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x395, x423, &x441);
+ { uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x398, x426, &x444);
+ { uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x400, x429, &x447);
+ { uint32_t _; uint32_t x450 = _mulx_u32(x432, 0xf0f0f0f1, &_);
+ { uint32_t x454; uint32_t x453 = _mulx_u32(x450, 0xffffffef, &x454);
+ { uint32_t x457; uint32_t x456 = _mulx_u32(x450, 0xffffffff, &x457);
+ { uint32_t x460; uint32_t x459 = _mulx_u32(x450, 0xffffffff, &x460);
+ { uint32_t x463; uint32_t x462 = _mulx_u32(x450, 0xffffffff, &x463);
+ { uint32_t x466; uint32_t x465 = _mulx_u32(x450, 0xffffff, &x466);
+ { uint32_t x468; uint8_t x469 = _addcarryx_u32(0x0, x454, x456, &x468);
+ { uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x457, x459, &x471);
+ { uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x460, x462, &x474);
+ { uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x463, x465, &x477);
+ { uint32_t x480; uint8_t _ = _addcarryx_u32(0x0, x478, x466, &x480);
+ { uint32_t _; uint8_t x484 = _addcarryx_u32(0x0, x432, x453, &_);
+ { uint32_t x486; uint8_t x487 = _addcarryx_u32(x484, x435, x468, &x486);
+ { uint32_t x489; uint8_t x490 = _addcarryx_u32(x487, x438, x471, &x489);
+ { uint32_t x492; uint8_t x493 = _addcarryx_u32(x490, x441, x474, &x492);
+ { uint32_t x495; uint8_t x496 = _addcarryx_u32(x493, x444, x477, &x495);
+ { uint32_t x498; uint8_t x499 = _addcarryx_u32(x496, x447, x480, &x498);
+ { uint8_t x500 = (x499 + x448);
+ { uint32_t x502; uint8_t x503 = _subborrow_u32(0x0, x486, 0xffffffef, &x502);
+ { uint32_t x505; uint8_t x506 = _subborrow_u32(x503, x489, 0xffffffff, &x505);
+ { uint32_t x508; uint8_t x509 = _subborrow_u32(x506, x492, 0xffffffff, &x508);
+ { uint32_t x511; uint8_t x512 = _subborrow_u32(x509, x495, 0xffffffff, &x511);
+ { uint32_t x514; uint8_t x515 = _subborrow_u32(x512, x498, 0xffffff, &x514);
+ { uint32_t _; uint8_t x518 = _subborrow_u32(x515, x500, 0x0, &_);
+ { uint32_t x519 = cmovznz(x518, x514, x498);
+ { uint32_t x520 = cmovznz(x518, x511, x495);
+ { uint32_t x521 = cmovznz(x518, x508, x492);
+ { uint32_t x522 = cmovznz(x518, x505, x489);
+ { uint32_t x523 = cmovznz(x518, x502, x486);
+ out[0] = x523;
+ out[1] = x522;
+ out[2] = x521;
+ out[3] = x520;
+ out[4] = x519;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e152m17/fenz.c b/src/Specific/montgomery32_2e152m17/fenz.c
index 755695e18..5601e8c15 100644
--- a/src/Specific/montgomery32_2e152m17/fenz.c
+++ b/src/Specific/montgomery32_2e152m17/fenz.c
@@ -1,26 +1,13 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x9 = (x8 | x7);
-{ uint32_t x10 = (x6 | x9);
-{ uint32_t x11 = (x4 | x10);
-{ uint32_t x12 = (x2 | x11);
-out[0] = x12;
-}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[5]) {
+ { const uint32_t x7 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x9 = (x8 | x7);
+ { uint32_t x10 = (x6 | x9);
+ { uint32_t x11 = (x4 | x10);
+ { uint32_t x12 = (x2 | x11);
+ out[0] = x12;
+ }}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e152m17/feopp.c b/src/Specific/montgomery32_2e152m17/feopp.c
index 095aad6db..3d25ee50b 100644
--- a/src/Specific/montgomery32_2e152m17/feopp.c
+++ b/src/Specific/montgomery32_2e152m17/feopp.c
@@ -1,42 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
-{ uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
-{ uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
-{ uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
-{ uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
-{ uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
-{ uint32_t x25 = (x24 & 0xffffffef);
-{ uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
-{ uint32_t x29 = (x24 & 0xffffffff);
-{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
-{ uint32_t x33 = (x24 & 0xffffffff);
-{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
-{ uint32_t x37 = (x24 & 0xffffffff);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
-{ uint32_t x41 = (x24 & 0xffffff);
-{ uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-out[4] = x27;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feopp(uint32_t out[5], const uint32_t in1[5]) {
+ { const uint32_t x7 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
+ { uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
+ { uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
+ { uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
+ { uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
+ { uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
+ { uint32_t x25 = (x24 & 0xffffffef);
+ { uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
+ { uint32_t x29 = (x24 & 0xffffffff);
+ { uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
+ { uint32_t x33 = (x24 & 0xffffffff);
+ { uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
+ { uint32_t x37 = (x24 & 0xffffffff);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
+ { uint32_t x41 = (x24 & 0xffffff);
+ { uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e152m17/fesub.c b/src/Specific/montgomery32_2e152m17/fesub.c
index bf1970d94..f3c334379 100644
--- a/src/Specific/montgomery32_2e152m17/fesub.c
+++ b/src/Specific/montgomery32_2e152m17/fesub.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
-{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
-{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
-{ uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
-{ uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
-{ uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
-{ uint32_t x36 = (x35 & 0xffffffef);
-{ uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
-{ uint32_t x40 = (x35 & 0xffffffff);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
-{ uint32_t x44 = (x35 & 0xffffffff);
-{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
-{ uint32_t x48 = (x35 & 0xffffffff);
-{ uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
-{ uint32_t x52 = (x35 & 0xffffff);
-{ uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
-out[0] = x54;
-out[1] = x50;
-out[2] = x46;
-out[3] = x42;
-out[4] = x38;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesub(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
+ { uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
+ { uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
+ { uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
+ { uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
+ { uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
+ { uint32_t x36 = (x35 & 0xffffffef);
+ { uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
+ { uint32_t x40 = (x35 & 0xffffffff);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
+ { uint32_t x44 = (x35 & 0xffffffff);
+ { uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
+ { uint32_t x48 = (x35 & 0xffffffff);
+ { uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
+ { uint32_t x52 = (x35 & 0xffffff);
+ { uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
+ out[0] = x38;
+ out[1] = x42;
+ out[2] = x46;
+ out[3] = x50;
+ out[4] = x54;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e158m15/feadd.c b/src/Specific/montgomery32_2e158m15/feadd.c
index c28d5c418..4830cea75 100644
--- a/src/Specific/montgomery32_2e158m15/feadd.c
+++ b/src/Specific/montgomery32_2e158m15/feadd.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
-{ uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
-{ uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
-{ uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
-{ uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
-{ uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xfffffff1, &x36);
-{ uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
-{ uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
-{ uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
-{ uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0x3fffffff, &x48);
-{ uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
-{ uint32_t x53 = cmovznz(x52, x48, x33);
-{ uint32_t x54 = cmovznz(x52, x45, x30);
-{ uint32_t x55 = cmovznz(x52, x42, x27);
-{ uint32_t x56 = cmovznz(x52, x39, x24);
-{ uint32_t x57 = cmovznz(x52, x36, x21);
-out[0] = x53;
-out[1] = x54;
-out[2] = x55;
-out[3] = x56;
-out[4] = x57;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feadd(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
+ { uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
+ { uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
+ { uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
+ { uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
+ { uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xfffffff1, &x36);
+ { uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
+ { uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
+ { uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
+ { uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0x3fffffff, &x48);
+ { uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
+ { uint32_t x53 = cmovznz(x52, x48, x33);
+ { uint32_t x54 = cmovznz(x52, x45, x30);
+ { uint32_t x55 = cmovznz(x52, x42, x27);
+ { uint32_t x56 = cmovznz(x52, x39, x24);
+ { uint32_t x57 = cmovznz(x52, x36, x21);
+ out[0] = x57;
+ out[1] = x56;
+ out[2] = x55;
+ out[3] = x54;
+ out[4] = x53;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e158m15/femul.c b/src/Specific/montgomery32_2e158m15/femul.c
index fd32df5b8..546c32f36 100644
--- a/src/Specific/montgomery32_2e158m15/femul.c
+++ b/src/Specific/montgomery32_2e158m15/femul.c
@@ -1,200 +1,192 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
-{ uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
-{ uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
-{ uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
-{ uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
-{ uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
-{ uint32_t _; uint32_t x51 = _mulx_u32(x21, 0xeeeeeeef, &_);
-{ uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xfffffff1, &x55);
-{ uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
-{ uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
-{ uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
-{ uint32_t x67; uint32_t x66 = _mulx_u32(x51, 0x3fffffff, &x67);
-{ uint32_t x69; uint8_t x70 = _addcarryx_u32(0x0, x55, x57, &x69);
-{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x58, x60, &x72);
-{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x61, x63, &x75);
-{ uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x64, x66, &x78);
-{ uint32_t x81; uint8_t _ = _addcarryx_u32(0x0, x79, x67, &x81);
-{ uint32_t _; uint8_t x85 = _addcarryx_u32(0x0, x21, x54, &_);
-{ uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x36, x69, &x87);
-{ uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x39, x72, &x90);
-{ uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x42, x75, &x93);
-{ uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x45, x78, &x96);
-{ uint32_t x99; uint8_t x100 = _addcarryx_u32(x97, x48, x81, &x99);
-{ uint32_t x103; uint32_t x102 = _mulx_u32(x7, x13, &x103);
-{ uint32_t x106; uint32_t x105 = _mulx_u32(x7, x15, &x106);
-{ uint32_t x109; uint32_t x108 = _mulx_u32(x7, x17, &x109);
-{ uint32_t x112; uint32_t x111 = _mulx_u32(x7, x19, &x112);
-{ uint32_t x115; uint32_t x114 = _mulx_u32(x7, x18, &x115);
-{ uint32_t x117; uint8_t x118 = _addcarryx_u32(0x0, x103, x105, &x117);
-{ uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
-{ uint32_t x123; uint8_t x124 = _addcarryx_u32(x121, x109, x111, &x123);
-{ uint32_t x126; uint8_t x127 = _addcarryx_u32(x124, x112, x114, &x126);
-{ uint32_t x129; uint8_t _ = _addcarryx_u32(0x0, x127, x115, &x129);
-{ uint32_t x132; uint8_t x133 = _addcarryx_u32(0x0, x87, x102, &x132);
-{ uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
-{ uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
-{ uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x96, x123, &x141);
-{ uint32_t x144; uint8_t x145 = _addcarryx_u32(x142, x99, x126, &x144);
-{ uint32_t x147; uint8_t x148 = _addcarryx_u32(x145, x100, x129, &x147);
-{ uint32_t _; uint32_t x150 = _mulx_u32(x132, 0xeeeeeeef, &_);
-{ uint32_t x154; uint32_t x153 = _mulx_u32(x150, 0xfffffff1, &x154);
-{ uint32_t x157; uint32_t x156 = _mulx_u32(x150, 0xffffffff, &x157);
-{ uint32_t x160; uint32_t x159 = _mulx_u32(x150, 0xffffffff, &x160);
-{ uint32_t x163; uint32_t x162 = _mulx_u32(x150, 0xffffffff, &x163);
-{ uint32_t x166; uint32_t x165 = _mulx_u32(x150, 0x3fffffff, &x166);
-{ uint32_t x168; uint8_t x169 = _addcarryx_u32(0x0, x154, x156, &x168);
-{ uint32_t x171; uint8_t x172 = _addcarryx_u32(x169, x157, x159, &x171);
-{ uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x160, x162, &x174);
-{ uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x163, x165, &x177);
-{ uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x166, &x180);
-{ uint32_t _; uint8_t x184 = _addcarryx_u32(0x0, x132, x153, &_);
-{ uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x135, x168, &x186);
-{ uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x138, x171, &x189);
-{ uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x141, x174, &x192);
-{ uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x144, x177, &x195);
-{ uint32_t x198; uint8_t x199 = _addcarryx_u32(x196, x147, x180, &x198);
-{ uint8_t x200 = (x199 + x148);
-{ uint32_t x203; uint32_t x202 = _mulx_u32(x9, x13, &x203);
-{ uint32_t x206; uint32_t x205 = _mulx_u32(x9, x15, &x206);
-{ uint32_t x209; uint32_t x208 = _mulx_u32(x9, x17, &x209);
-{ uint32_t x212; uint32_t x211 = _mulx_u32(x9, x19, &x212);
-{ uint32_t x215; uint32_t x214 = _mulx_u32(x9, x18, &x215);
-{ uint32_t x217; uint8_t x218 = _addcarryx_u32(0x0, x203, x205, &x217);
-{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x206, x208, &x220);
-{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x209, x211, &x223);
-{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x212, x214, &x226);
-{ uint32_t x229; uint8_t _ = _addcarryx_u32(0x0, x227, x215, &x229);
-{ uint32_t x232; uint8_t x233 = _addcarryx_u32(0x0, x186, x202, &x232);
-{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x189, x217, &x235);
-{ uint32_t x238; uint8_t x239 = _addcarryx_u32(x236, x192, x220, &x238);
-{ uint32_t x241; uint8_t x242 = _addcarryx_u32(x239, x195, x223, &x241);
-{ uint32_t x244; uint8_t x245 = _addcarryx_u32(x242, x198, x226, &x244);
-{ uint32_t x247; uint8_t x248 = _addcarryx_u32(x245, x200, x229, &x247);
-{ uint32_t _; uint32_t x250 = _mulx_u32(x232, 0xeeeeeeef, &_);
-{ uint32_t x254; uint32_t x253 = _mulx_u32(x250, 0xfffffff1, &x254);
-{ uint32_t x257; uint32_t x256 = _mulx_u32(x250, 0xffffffff, &x257);
-{ uint32_t x260; uint32_t x259 = _mulx_u32(x250, 0xffffffff, &x260);
-{ uint32_t x263; uint32_t x262 = _mulx_u32(x250, 0xffffffff, &x263);
-{ uint32_t x266; uint32_t x265 = _mulx_u32(x250, 0x3fffffff, &x266);
-{ uint32_t x268; uint8_t x269 = _addcarryx_u32(0x0, x254, x256, &x268);
-{ uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x257, x259, &x271);
-{ uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x260, x262, &x274);
-{ uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x263, x265, &x277);
-{ uint32_t x280; uint8_t _ = _addcarryx_u32(0x0, x278, x266, &x280);
-{ uint32_t _; uint8_t x284 = _addcarryx_u32(0x0, x232, x253, &_);
-{ uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x235, x268, &x286);
-{ uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x238, x271, &x289);
-{ uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x241, x274, &x292);
-{ uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x244, x277, &x295);
-{ uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x247, x280, &x298);
-{ uint8_t x300 = (x299 + x248);
-{ uint32_t x303; uint32_t x302 = _mulx_u32(x11, x13, &x303);
-{ uint32_t x306; uint32_t x305 = _mulx_u32(x11, x15, &x306);
-{ uint32_t x309; uint32_t x308 = _mulx_u32(x11, x17, &x309);
-{ uint32_t x312; uint32_t x311 = _mulx_u32(x11, x19, &x312);
-{ uint32_t x315; uint32_t x314 = _mulx_u32(x11, x18, &x315);
-{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x303, x305, &x317);
-{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x306, x308, &x320);
-{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x309, x311, &x323);
-{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x312, x314, &x326);
-{ uint32_t x329; uint8_t _ = _addcarryx_u32(0x0, x327, x315, &x329);
-{ uint32_t x332; uint8_t x333 = _addcarryx_u32(0x0, x286, x302, &x332);
-{ uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x289, x317, &x335);
-{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x292, x320, &x338);
-{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x295, x323, &x341);
-{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x298, x326, &x344);
-{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x300, x329, &x347);
-{ uint32_t _; uint32_t x350 = _mulx_u32(x332, 0xeeeeeeef, &_);
-{ uint32_t x354; uint32_t x353 = _mulx_u32(x350, 0xfffffff1, &x354);
-{ uint32_t x357; uint32_t x356 = _mulx_u32(x350, 0xffffffff, &x357);
-{ uint32_t x360; uint32_t x359 = _mulx_u32(x350, 0xffffffff, &x360);
-{ uint32_t x363; uint32_t x362 = _mulx_u32(x350, 0xffffffff, &x363);
-{ uint32_t x366; uint32_t x365 = _mulx_u32(x350, 0x3fffffff, &x366);
-{ uint32_t x368; uint8_t x369 = _addcarryx_u32(0x0, x354, x356, &x368);
-{ uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x357, x359, &x371);
-{ uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x360, x362, &x374);
-{ uint32_t x377; uint8_t x378 = _addcarryx_u32(x375, x363, x365, &x377);
-{ uint32_t x380; uint8_t _ = _addcarryx_u32(0x0, x378, x366, &x380);
-{ uint32_t _; uint8_t x384 = _addcarryx_u32(0x0, x332, x353, &_);
-{ uint32_t x386; uint8_t x387 = _addcarryx_u32(x384, x335, x368, &x386);
-{ uint32_t x389; uint8_t x390 = _addcarryx_u32(x387, x338, x371, &x389);
-{ uint32_t x392; uint8_t x393 = _addcarryx_u32(x390, x341, x374, &x392);
-{ uint32_t x395; uint8_t x396 = _addcarryx_u32(x393, x344, x377, &x395);
-{ uint32_t x398; uint8_t x399 = _addcarryx_u32(x396, x347, x380, &x398);
-{ uint8_t x400 = (x399 + x348);
-{ uint32_t x403; uint32_t x402 = _mulx_u32(x10, x13, &x403);
-{ uint32_t x406; uint32_t x405 = _mulx_u32(x10, x15, &x406);
-{ uint32_t x409; uint32_t x408 = _mulx_u32(x10, x17, &x409);
-{ uint32_t x412; uint32_t x411 = _mulx_u32(x10, x19, &x412);
-{ uint32_t x415; uint32_t x414 = _mulx_u32(x10, x18, &x415);
-{ uint32_t x417; uint8_t x418 = _addcarryx_u32(0x0, x403, x405, &x417);
-{ uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x406, x408, &x420);
-{ uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x409, x411, &x423);
-{ uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x412, x414, &x426);
-{ uint32_t x429; uint8_t _ = _addcarryx_u32(0x0, x427, x415, &x429);
-{ uint32_t x432; uint8_t x433 = _addcarryx_u32(0x0, x386, x402, &x432);
-{ uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x389, x417, &x435);
-{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x392, x420, &x438);
-{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x395, x423, &x441);
-{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x398, x426, &x444);
-{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x400, x429, &x447);
-{ uint32_t _; uint32_t x450 = _mulx_u32(x432, 0xeeeeeeef, &_);
-{ uint32_t x454; uint32_t x453 = _mulx_u32(x450, 0xfffffff1, &x454);
-{ uint32_t x457; uint32_t x456 = _mulx_u32(x450, 0xffffffff, &x457);
-{ uint32_t x460; uint32_t x459 = _mulx_u32(x450, 0xffffffff, &x460);
-{ uint32_t x463; uint32_t x462 = _mulx_u32(x450, 0xffffffff, &x463);
-{ uint32_t x466; uint32_t x465 = _mulx_u32(x450, 0x3fffffff, &x466);
-{ uint32_t x468; uint8_t x469 = _addcarryx_u32(0x0, x454, x456, &x468);
-{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x457, x459, &x471);
-{ uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x460, x462, &x474);
-{ uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x463, x465, &x477);
-{ uint32_t x480; uint8_t _ = _addcarryx_u32(0x0, x478, x466, &x480);
-{ uint32_t _; uint8_t x484 = _addcarryx_u32(0x0, x432, x453, &_);
-{ uint32_t x486; uint8_t x487 = _addcarryx_u32(x484, x435, x468, &x486);
-{ uint32_t x489; uint8_t x490 = _addcarryx_u32(x487, x438, x471, &x489);
-{ uint32_t x492; uint8_t x493 = _addcarryx_u32(x490, x441, x474, &x492);
-{ uint32_t x495; uint8_t x496 = _addcarryx_u32(x493, x444, x477, &x495);
-{ uint32_t x498; uint8_t x499 = _addcarryx_u32(x496, x447, x480, &x498);
-{ uint8_t x500 = (x499 + x448);
-{ uint32_t x502; uint8_t x503 = _subborrow_u32(0x0, x486, 0xfffffff1, &x502);
-{ uint32_t x505; uint8_t x506 = _subborrow_u32(x503, x489, 0xffffffff, &x505);
-{ uint32_t x508; uint8_t x509 = _subborrow_u32(x506, x492, 0xffffffff, &x508);
-{ uint32_t x511; uint8_t x512 = _subborrow_u32(x509, x495, 0xffffffff, &x511);
-{ uint32_t x514; uint8_t x515 = _subborrow_u32(x512, x498, 0x3fffffff, &x514);
-{ uint32_t _; uint8_t x518 = _subborrow_u32(x515, x500, 0x0, &_);
-{ uint32_t x519 = cmovznz(x518, x514, x498);
-{ uint32_t x520 = cmovznz(x518, x511, x495);
-{ uint32_t x521 = cmovznz(x518, x508, x492);
-{ uint32_t x522 = cmovznz(x518, x505, x489);
-{ uint32_t x523 = cmovznz(x518, x502, x486);
-out[0] = x519;
-out[1] = x520;
-out[2] = x521;
-out[3] = x522;
-out[4] = x523;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
+ { uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
+ { uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
+ { uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
+ { uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
+ { uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
+ { uint32_t _; uint32_t x51 = _mulx_u32(x21, 0xeeeeeeef, &_);
+ { uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xfffffff1, &x55);
+ { uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
+ { uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
+ { uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
+ { uint32_t x67; uint32_t x66 = _mulx_u32(x51, 0x3fffffff, &x67);
+ { uint32_t x69; uint8_t x70 = _addcarryx_u32(0x0, x55, x57, &x69);
+ { uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x58, x60, &x72);
+ { uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x61, x63, &x75);
+ { uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x64, x66, &x78);
+ { uint32_t x81; uint8_t _ = _addcarryx_u32(0x0, x79, x67, &x81);
+ { uint32_t _; uint8_t x85 = _addcarryx_u32(0x0, x21, x54, &_);
+ { uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x36, x69, &x87);
+ { uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x39, x72, &x90);
+ { uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x42, x75, &x93);
+ { uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x45, x78, &x96);
+ { uint32_t x99; uint8_t x100 = _addcarryx_u32(x97, x48, x81, &x99);
+ { uint32_t x103; uint32_t x102 = _mulx_u32(x7, x13, &x103);
+ { uint32_t x106; uint32_t x105 = _mulx_u32(x7, x15, &x106);
+ { uint32_t x109; uint32_t x108 = _mulx_u32(x7, x17, &x109);
+ { uint32_t x112; uint32_t x111 = _mulx_u32(x7, x19, &x112);
+ { uint32_t x115; uint32_t x114 = _mulx_u32(x7, x18, &x115);
+ { uint32_t x117; uint8_t x118 = _addcarryx_u32(0x0, x103, x105, &x117);
+ { uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
+ { uint32_t x123; uint8_t x124 = _addcarryx_u32(x121, x109, x111, &x123);
+ { uint32_t x126; uint8_t x127 = _addcarryx_u32(x124, x112, x114, &x126);
+ { uint32_t x129; uint8_t _ = _addcarryx_u32(0x0, x127, x115, &x129);
+ { uint32_t x132; uint8_t x133 = _addcarryx_u32(0x0, x87, x102, &x132);
+ { uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
+ { uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
+ { uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x96, x123, &x141);
+ { uint32_t x144; uint8_t x145 = _addcarryx_u32(x142, x99, x126, &x144);
+ { uint32_t x147; uint8_t x148 = _addcarryx_u32(x145, x100, x129, &x147);
+ { uint32_t _; uint32_t x150 = _mulx_u32(x132, 0xeeeeeeef, &_);
+ { uint32_t x154; uint32_t x153 = _mulx_u32(x150, 0xfffffff1, &x154);
+ { uint32_t x157; uint32_t x156 = _mulx_u32(x150, 0xffffffff, &x157);
+ { uint32_t x160; uint32_t x159 = _mulx_u32(x150, 0xffffffff, &x160);
+ { uint32_t x163; uint32_t x162 = _mulx_u32(x150, 0xffffffff, &x163);
+ { uint32_t x166; uint32_t x165 = _mulx_u32(x150, 0x3fffffff, &x166);
+ { uint32_t x168; uint8_t x169 = _addcarryx_u32(0x0, x154, x156, &x168);
+ { uint32_t x171; uint8_t x172 = _addcarryx_u32(x169, x157, x159, &x171);
+ { uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x160, x162, &x174);
+ { uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x163, x165, &x177);
+ { uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x166, &x180);
+ { uint32_t _; uint8_t x184 = _addcarryx_u32(0x0, x132, x153, &_);
+ { uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x135, x168, &x186);
+ { uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x138, x171, &x189);
+ { uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x141, x174, &x192);
+ { uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x144, x177, &x195);
+ { uint32_t x198; uint8_t x199 = _addcarryx_u32(x196, x147, x180, &x198);
+ { uint8_t x200 = (x199 + x148);
+ { uint32_t x203; uint32_t x202 = _mulx_u32(x9, x13, &x203);
+ { uint32_t x206; uint32_t x205 = _mulx_u32(x9, x15, &x206);
+ { uint32_t x209; uint32_t x208 = _mulx_u32(x9, x17, &x209);
+ { uint32_t x212; uint32_t x211 = _mulx_u32(x9, x19, &x212);
+ { uint32_t x215; uint32_t x214 = _mulx_u32(x9, x18, &x215);
+ { uint32_t x217; uint8_t x218 = _addcarryx_u32(0x0, x203, x205, &x217);
+ { uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x206, x208, &x220);
+ { uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x209, x211, &x223);
+ { uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x212, x214, &x226);
+ { uint32_t x229; uint8_t _ = _addcarryx_u32(0x0, x227, x215, &x229);
+ { uint32_t x232; uint8_t x233 = _addcarryx_u32(0x0, x186, x202, &x232);
+ { uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x189, x217, &x235);
+ { uint32_t x238; uint8_t x239 = _addcarryx_u32(x236, x192, x220, &x238);
+ { uint32_t x241; uint8_t x242 = _addcarryx_u32(x239, x195, x223, &x241);
+ { uint32_t x244; uint8_t x245 = _addcarryx_u32(x242, x198, x226, &x244);
+ { uint32_t x247; uint8_t x248 = _addcarryx_u32(x245, x200, x229, &x247);
+ { uint32_t _; uint32_t x250 = _mulx_u32(x232, 0xeeeeeeef, &_);
+ { uint32_t x254; uint32_t x253 = _mulx_u32(x250, 0xfffffff1, &x254);
+ { uint32_t x257; uint32_t x256 = _mulx_u32(x250, 0xffffffff, &x257);
+ { uint32_t x260; uint32_t x259 = _mulx_u32(x250, 0xffffffff, &x260);
+ { uint32_t x263; uint32_t x262 = _mulx_u32(x250, 0xffffffff, &x263);
+ { uint32_t x266; uint32_t x265 = _mulx_u32(x250, 0x3fffffff, &x266);
+ { uint32_t x268; uint8_t x269 = _addcarryx_u32(0x0, x254, x256, &x268);
+ { uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x257, x259, &x271);
+ { uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x260, x262, &x274);
+ { uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x263, x265, &x277);
+ { uint32_t x280; uint8_t _ = _addcarryx_u32(0x0, x278, x266, &x280);
+ { uint32_t _; uint8_t x284 = _addcarryx_u32(0x0, x232, x253, &_);
+ { uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x235, x268, &x286);
+ { uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x238, x271, &x289);
+ { uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x241, x274, &x292);
+ { uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x244, x277, &x295);
+ { uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x247, x280, &x298);
+ { uint8_t x300 = (x299 + x248);
+ { uint32_t x303; uint32_t x302 = _mulx_u32(x11, x13, &x303);
+ { uint32_t x306; uint32_t x305 = _mulx_u32(x11, x15, &x306);
+ { uint32_t x309; uint32_t x308 = _mulx_u32(x11, x17, &x309);
+ { uint32_t x312; uint32_t x311 = _mulx_u32(x11, x19, &x312);
+ { uint32_t x315; uint32_t x314 = _mulx_u32(x11, x18, &x315);
+ { uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x303, x305, &x317);
+ { uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x306, x308, &x320);
+ { uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x309, x311, &x323);
+ { uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x312, x314, &x326);
+ { uint32_t x329; uint8_t _ = _addcarryx_u32(0x0, x327, x315, &x329);
+ { uint32_t x332; uint8_t x333 = _addcarryx_u32(0x0, x286, x302, &x332);
+ { uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x289, x317, &x335);
+ { uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x292, x320, &x338);
+ { uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x295, x323, &x341);
+ { uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x298, x326, &x344);
+ { uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x300, x329, &x347);
+ { uint32_t _; uint32_t x350 = _mulx_u32(x332, 0xeeeeeeef, &_);
+ { uint32_t x354; uint32_t x353 = _mulx_u32(x350, 0xfffffff1, &x354);
+ { uint32_t x357; uint32_t x356 = _mulx_u32(x350, 0xffffffff, &x357);
+ { uint32_t x360; uint32_t x359 = _mulx_u32(x350, 0xffffffff, &x360);
+ { uint32_t x363; uint32_t x362 = _mulx_u32(x350, 0xffffffff, &x363);
+ { uint32_t x366; uint32_t x365 = _mulx_u32(x350, 0x3fffffff, &x366);
+ { uint32_t x368; uint8_t x369 = _addcarryx_u32(0x0, x354, x356, &x368);
+ { uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x357, x359, &x371);
+ { uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x360, x362, &x374);
+ { uint32_t x377; uint8_t x378 = _addcarryx_u32(x375, x363, x365, &x377);
+ { uint32_t x380; uint8_t _ = _addcarryx_u32(0x0, x378, x366, &x380);
+ { uint32_t _; uint8_t x384 = _addcarryx_u32(0x0, x332, x353, &_);
+ { uint32_t x386; uint8_t x387 = _addcarryx_u32(x384, x335, x368, &x386);
+ { uint32_t x389; uint8_t x390 = _addcarryx_u32(x387, x338, x371, &x389);
+ { uint32_t x392; uint8_t x393 = _addcarryx_u32(x390, x341, x374, &x392);
+ { uint32_t x395; uint8_t x396 = _addcarryx_u32(x393, x344, x377, &x395);
+ { uint32_t x398; uint8_t x399 = _addcarryx_u32(x396, x347, x380, &x398);
+ { uint8_t x400 = (x399 + x348);
+ { uint32_t x403; uint32_t x402 = _mulx_u32(x10, x13, &x403);
+ { uint32_t x406; uint32_t x405 = _mulx_u32(x10, x15, &x406);
+ { uint32_t x409; uint32_t x408 = _mulx_u32(x10, x17, &x409);
+ { uint32_t x412; uint32_t x411 = _mulx_u32(x10, x19, &x412);
+ { uint32_t x415; uint32_t x414 = _mulx_u32(x10, x18, &x415);
+ { uint32_t x417; uint8_t x418 = _addcarryx_u32(0x0, x403, x405, &x417);
+ { uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x406, x408, &x420);
+ { uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x409, x411, &x423);
+ { uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x412, x414, &x426);
+ { uint32_t x429; uint8_t _ = _addcarryx_u32(0x0, x427, x415, &x429);
+ { uint32_t x432; uint8_t x433 = _addcarryx_u32(0x0, x386, x402, &x432);
+ { uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x389, x417, &x435);
+ { uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x392, x420, &x438);
+ { uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x395, x423, &x441);
+ { uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x398, x426, &x444);
+ { uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x400, x429, &x447);
+ { uint32_t _; uint32_t x450 = _mulx_u32(x432, 0xeeeeeeef, &_);
+ { uint32_t x454; uint32_t x453 = _mulx_u32(x450, 0xfffffff1, &x454);
+ { uint32_t x457; uint32_t x456 = _mulx_u32(x450, 0xffffffff, &x457);
+ { uint32_t x460; uint32_t x459 = _mulx_u32(x450, 0xffffffff, &x460);
+ { uint32_t x463; uint32_t x462 = _mulx_u32(x450, 0xffffffff, &x463);
+ { uint32_t x466; uint32_t x465 = _mulx_u32(x450, 0x3fffffff, &x466);
+ { uint32_t x468; uint8_t x469 = _addcarryx_u32(0x0, x454, x456, &x468);
+ { uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x457, x459, &x471);
+ { uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x460, x462, &x474);
+ { uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x463, x465, &x477);
+ { uint32_t x480; uint8_t _ = _addcarryx_u32(0x0, x478, x466, &x480);
+ { uint32_t _; uint8_t x484 = _addcarryx_u32(0x0, x432, x453, &_);
+ { uint32_t x486; uint8_t x487 = _addcarryx_u32(x484, x435, x468, &x486);
+ { uint32_t x489; uint8_t x490 = _addcarryx_u32(x487, x438, x471, &x489);
+ { uint32_t x492; uint8_t x493 = _addcarryx_u32(x490, x441, x474, &x492);
+ { uint32_t x495; uint8_t x496 = _addcarryx_u32(x493, x444, x477, &x495);
+ { uint32_t x498; uint8_t x499 = _addcarryx_u32(x496, x447, x480, &x498);
+ { uint8_t x500 = (x499 + x448);
+ { uint32_t x502; uint8_t x503 = _subborrow_u32(0x0, x486, 0xfffffff1, &x502);
+ { uint32_t x505; uint8_t x506 = _subborrow_u32(x503, x489, 0xffffffff, &x505);
+ { uint32_t x508; uint8_t x509 = _subborrow_u32(x506, x492, 0xffffffff, &x508);
+ { uint32_t x511; uint8_t x512 = _subborrow_u32(x509, x495, 0xffffffff, &x511);
+ { uint32_t x514; uint8_t x515 = _subborrow_u32(x512, x498, 0x3fffffff, &x514);
+ { uint32_t _; uint8_t x518 = _subborrow_u32(x515, x500, 0x0, &_);
+ { uint32_t x519 = cmovznz(x518, x514, x498);
+ { uint32_t x520 = cmovznz(x518, x511, x495);
+ { uint32_t x521 = cmovznz(x518, x508, x492);
+ { uint32_t x522 = cmovznz(x518, x505, x489);
+ { uint32_t x523 = cmovznz(x518, x502, x486);
+ out[0] = x523;
+ out[1] = x522;
+ out[2] = x521;
+ out[3] = x520;
+ out[4] = x519;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e158m15/fenz.c b/src/Specific/montgomery32_2e158m15/fenz.c
index 755695e18..5601e8c15 100644
--- a/src/Specific/montgomery32_2e158m15/fenz.c
+++ b/src/Specific/montgomery32_2e158m15/fenz.c
@@ -1,26 +1,13 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x9 = (x8 | x7);
-{ uint32_t x10 = (x6 | x9);
-{ uint32_t x11 = (x4 | x10);
-{ uint32_t x12 = (x2 | x11);
-out[0] = x12;
-}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[5]) {
+ { const uint32_t x7 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x9 = (x8 | x7);
+ { uint32_t x10 = (x6 | x9);
+ { uint32_t x11 = (x4 | x10);
+ { uint32_t x12 = (x2 | x11);
+ out[0] = x12;
+ }}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e158m15/feopp.c b/src/Specific/montgomery32_2e158m15/feopp.c
index ff0b2a1b3..a5f6375a9 100644
--- a/src/Specific/montgomery32_2e158m15/feopp.c
+++ b/src/Specific/montgomery32_2e158m15/feopp.c
@@ -1,42 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
-{ uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
-{ uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
-{ uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
-{ uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
-{ uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
-{ uint32_t x25 = (x24 & 0xfffffff1);
-{ uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
-{ uint32_t x29 = (x24 & 0xffffffff);
-{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
-{ uint32_t x33 = (x24 & 0xffffffff);
-{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
-{ uint32_t x37 = (x24 & 0xffffffff);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
-{ uint32_t x41 = (x24 & 0x3fffffff);
-{ uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-out[4] = x27;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feopp(uint32_t out[5], const uint32_t in1[5]) {
+ { const uint32_t x7 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
+ { uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
+ { uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
+ { uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
+ { uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
+ { uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
+ { uint32_t x25 = (x24 & 0xfffffff1);
+ { uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
+ { uint32_t x29 = (x24 & 0xffffffff);
+ { uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
+ { uint32_t x33 = (x24 & 0xffffffff);
+ { uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
+ { uint32_t x37 = (x24 & 0xffffffff);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
+ { uint32_t x41 = (x24 & 0x3fffffff);
+ { uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e158m15/fesub.c b/src/Specific/montgomery32_2e158m15/fesub.c
index 4234a2422..1ae7ece71 100644
--- a/src/Specific/montgomery32_2e158m15/fesub.c
+++ b/src/Specific/montgomery32_2e158m15/fesub.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
-{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
-{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
-{ uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
-{ uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
-{ uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
-{ uint32_t x36 = (x35 & 0xfffffff1);
-{ uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
-{ uint32_t x40 = (x35 & 0xffffffff);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
-{ uint32_t x44 = (x35 & 0xffffffff);
-{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
-{ uint32_t x48 = (x35 & 0xffffffff);
-{ uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
-{ uint32_t x52 = (x35 & 0x3fffffff);
-{ uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
-out[0] = x54;
-out[1] = x50;
-out[2] = x46;
-out[3] = x42;
-out[4] = x38;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesub(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
+ { uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
+ { uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
+ { uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
+ { uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
+ { uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
+ { uint32_t x36 = (x35 & 0xfffffff1);
+ { uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
+ { uint32_t x40 = (x35 & 0xffffffff);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
+ { uint32_t x44 = (x35 & 0xffffffff);
+ { uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
+ { uint32_t x48 = (x35 & 0xffffffff);
+ { uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
+ { uint32_t x52 = (x35 & 0x3fffffff);
+ { uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
+ out[0] = x38;
+ out[1] = x42;
+ out[2] = x46;
+ out[3] = x50;
+ out[4] = x54;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e165m25/feadd.c b/src/Specific/montgomery32_2e165m25/feadd.c
index 395c57b9d..c7eaaa241 100644
--- a/src/Specific/montgomery32_2e165m25/feadd.c
+++ b/src/Specific/montgomery32_2e165m25/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
-{ uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
-{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
-{ uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
-{ uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
-{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
-{ uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xffffffe7, &x43);
-{ uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
-{ uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
-{ uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
-{ uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
-{ uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x1f, &x58);
-{ uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
-{ uint32_t x63 = cmovznz(x62, x58, x40);
-{ uint32_t x64 = cmovznz(x62, x55, x37);
-{ uint32_t x65 = cmovznz(x62, x52, x34);
-{ uint32_t x66 = cmovznz(x62, x49, x31);
-{ uint32_t x67 = cmovznz(x62, x46, x28);
-{ uint32_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
+ { uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
+ { uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
+ { uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
+ { uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
+ { uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
+ { uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xffffffe7, &x43);
+ { uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
+ { uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
+ { uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
+ { uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
+ { uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x1f, &x58);
+ { uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
+ { uint32_t x63 = cmovznz(x62, x58, x40);
+ { uint32_t x64 = cmovznz(x62, x55, x37);
+ { uint32_t x65 = cmovznz(x62, x52, x34);
+ { uint32_t x66 = cmovznz(x62, x49, x31);
+ { uint32_t x67 = cmovznz(x62, x46, x28);
+ { uint32_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e165m25/femul.c b/src/Specific/montgomery32_2e165m25/femul.c
index 5d56cb96e..d4bc3fd9d 100644
--- a/src/Specific/montgomery32_2e165m25/femul.c
+++ b/src/Specific/montgomery32_2e165m25/femul.c
@@ -1,42 +1,266 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
-{ uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
-{ uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
-{ uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
-{ uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
-{ uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
-{ uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
-{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
-{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
-{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
-{ uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
-{ uint32_t _; uint32_t x61 = _mulx_u32(x25, 0xc28f5c29, &_);
-{ uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xffffffe7, &x65);
-{ uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
-{ uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
-{ uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
-{ uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
-out[0] = uint32_t x79;
-out[1] = uint8_t x80 = Op Syntax.MulSplit 32 Syntax.TWord 5 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 3 x61;
-out[2] = 0x1f;;
-}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
+ { uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
+ { uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
+ { uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
+ { uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
+ { uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
+ { uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
+ { uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
+ { uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
+ { uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
+ { uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
+ { uint32_t _; uint32_t x61 = _mulx_u32(x25, 0xc28f5c29, &_);
+ { uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xffffffe7, &x65);
+ { uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
+ { uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
+ { uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
+ { uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
+ { uint32_t x79, uint8_t x80 = Op (Syntax.MulSplit 32 (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x61, 0x1f);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(0x0, x65, x67, &x82);
+ { uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
+ { uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
+ { uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
+ { uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x77, x79, &x94);
+ { uint8_t x96 = (x95 + x80);
+ { uint32_t _; uint8_t x99 = _addcarryx_u32(0x0, x25, x64, &_);
+ { uint32_t x101; uint8_t x102 = _addcarryx_u32(x99, x43, x82, &x101);
+ { uint32_t x104; uint8_t x105 = _addcarryx_u32(x102, x46, x85, &x104);
+ { uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x49, x88, &x107);
+ { uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x52, x91, &x110);
+ { uint32_t x113; uint8_t x114 = _addcarryx_u32(x111, x55, x94, &x113);
+ { uint32_t x116; uint8_t x117 = _addcarryx_u32(x114, x58, x96, &x116);
+ { uint32_t x120; uint32_t x119 = _mulx_u32(x7, x15, &x120);
+ { uint32_t x123; uint32_t x122 = _mulx_u32(x7, x17, &x123);
+ { uint32_t x126; uint32_t x125 = _mulx_u32(x7, x19, &x126);
+ { uint32_t x129; uint32_t x128 = _mulx_u32(x7, x21, &x129);
+ { uint32_t x132; uint32_t x131 = _mulx_u32(x7, x23, &x132);
+ { uint32_t x135; uint32_t x134 = _mulx_u32(x7, x22, &x135);
+ { uint32_t x137; uint8_t x138 = _addcarryx_u32(0x0, x120, x122, &x137);
+ { uint32_t x140; uint8_t x141 = _addcarryx_u32(x138, x123, x125, &x140);
+ { uint32_t x143; uint8_t x144 = _addcarryx_u32(x141, x126, x128, &x143);
+ { uint32_t x146; uint8_t x147 = _addcarryx_u32(x144, x129, x131, &x146);
+ { uint32_t x149; uint8_t x150 = _addcarryx_u32(x147, x132, x134, &x149);
+ { uint32_t x152; uint8_t _ = _addcarryx_u32(0x0, x150, x135, &x152);
+ { uint32_t x155; uint8_t x156 = _addcarryx_u32(0x0, x101, x119, &x155);
+ { uint32_t x158; uint8_t x159 = _addcarryx_u32(x156, x104, x137, &x158);
+ { uint32_t x161; uint8_t x162 = _addcarryx_u32(x159, x107, x140, &x161);
+ { uint32_t x164; uint8_t x165 = _addcarryx_u32(x162, x110, x143, &x164);
+ { uint32_t x167; uint8_t x168 = _addcarryx_u32(x165, x113, x146, &x167);
+ { uint32_t x170; uint8_t x171 = _addcarryx_u32(x168, x116, x149, &x170);
+ { uint32_t x173; uint8_t x174 = _addcarryx_u32(x171, x117, x152, &x173);
+ { uint32_t _; uint32_t x176 = _mulx_u32(x155, 0xc28f5c29, &_);
+ { uint32_t x180; uint32_t x179 = _mulx_u32(x176, 0xffffffe7, &x180);
+ { uint32_t x183; uint32_t x182 = _mulx_u32(x176, 0xffffffff, &x183);
+ { uint32_t x186; uint32_t x185 = _mulx_u32(x176, 0xffffffff, &x186);
+ { uint32_t x189; uint32_t x188 = _mulx_u32(x176, 0xffffffff, &x189);
+ { uint32_t x192; uint32_t x191 = _mulx_u32(x176, 0xffffffff, &x192);
+ { uint32_t x194, uint8_t x195 = Op (Syntax.MulSplit 32 (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x176, 0x1f);
+ { uint32_t x197; uint8_t x198 = _addcarryx_u32(0x0, x180, x182, &x197);
+ { uint32_t x200; uint8_t x201 = _addcarryx_u32(x198, x183, x185, &x200);
+ { uint32_t x203; uint8_t x204 = _addcarryx_u32(x201, x186, x188, &x203);
+ { uint32_t x206; uint8_t x207 = _addcarryx_u32(x204, x189, x191, &x206);
+ { uint32_t x209; uint8_t x210 = _addcarryx_u32(x207, x192, x194, &x209);
+ { uint8_t x211 = (x210 + x195);
+ { uint32_t _; uint8_t x214 = _addcarryx_u32(0x0, x155, x179, &_);
+ { uint32_t x216; uint8_t x217 = _addcarryx_u32(x214, x158, x197, &x216);
+ { uint32_t x219; uint8_t x220 = _addcarryx_u32(x217, x161, x200, &x219);
+ { uint32_t x222; uint8_t x223 = _addcarryx_u32(x220, x164, x203, &x222);
+ { uint32_t x225; uint8_t x226 = _addcarryx_u32(x223, x167, x206, &x225);
+ { uint32_t x228; uint8_t x229 = _addcarryx_u32(x226, x170, x209, &x228);
+ { uint32_t x231; uint8_t x232 = _addcarryx_u32(x229, x173, x211, &x231);
+ { uint8_t x233 = (x232 + x174);
+ { uint32_t x236; uint32_t x235 = _mulx_u32(x9, x15, &x236);
+ { uint32_t x239; uint32_t x238 = _mulx_u32(x9, x17, &x239);
+ { uint32_t x242; uint32_t x241 = _mulx_u32(x9, x19, &x242);
+ { uint32_t x245; uint32_t x244 = _mulx_u32(x9, x21, &x245);
+ { uint32_t x248; uint32_t x247 = _mulx_u32(x9, x23, &x248);
+ { uint32_t x251; uint32_t x250 = _mulx_u32(x9, x22, &x251);
+ { uint32_t x253; uint8_t x254 = _addcarryx_u32(0x0, x236, x238, &x253);
+ { uint32_t x256; uint8_t x257 = _addcarryx_u32(x254, x239, x241, &x256);
+ { uint32_t x259; uint8_t x260 = _addcarryx_u32(x257, x242, x244, &x259);
+ { uint32_t x262; uint8_t x263 = _addcarryx_u32(x260, x245, x247, &x262);
+ { uint32_t x265; uint8_t x266 = _addcarryx_u32(x263, x248, x250, &x265);
+ { uint32_t x268; uint8_t _ = _addcarryx_u32(0x0, x266, x251, &x268);
+ { uint32_t x271; uint8_t x272 = _addcarryx_u32(0x0, x216, x235, &x271);
+ { uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x219, x253, &x274);
+ { uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x222, x256, &x277);
+ { uint32_t x280; uint8_t x281 = _addcarryx_u32(x278, x225, x259, &x280);
+ { uint32_t x283; uint8_t x284 = _addcarryx_u32(x281, x228, x262, &x283);
+ { uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x231, x265, &x286);
+ { uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x233, x268, &x289);
+ { uint32_t _; uint32_t x292 = _mulx_u32(x271, 0xc28f5c29, &_);
+ { uint32_t x296; uint32_t x295 = _mulx_u32(x292, 0xffffffe7, &x296);
+ { uint32_t x299; uint32_t x298 = _mulx_u32(x292, 0xffffffff, &x299);
+ { uint32_t x302; uint32_t x301 = _mulx_u32(x292, 0xffffffff, &x302);
+ { uint32_t x305; uint32_t x304 = _mulx_u32(x292, 0xffffffff, &x305);
+ { uint32_t x308; uint32_t x307 = _mulx_u32(x292, 0xffffffff, &x308);
+ { uint32_t x310, uint8_t x311 = Op (Syntax.MulSplit 32 (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x292, 0x1f);
+ { uint32_t x313; uint8_t x314 = _addcarryx_u32(0x0, x296, x298, &x313);
+ { uint32_t x316; uint8_t x317 = _addcarryx_u32(x314, x299, x301, &x316);
+ { uint32_t x319; uint8_t x320 = _addcarryx_u32(x317, x302, x304, &x319);
+ { uint32_t x322; uint8_t x323 = _addcarryx_u32(x320, x305, x307, &x322);
+ { uint32_t x325; uint8_t x326 = _addcarryx_u32(x323, x308, x310, &x325);
+ { uint8_t x327 = (x326 + x311);
+ { uint32_t _; uint8_t x330 = _addcarryx_u32(0x0, x271, x295, &_);
+ { uint32_t x332; uint8_t x333 = _addcarryx_u32(x330, x274, x313, &x332);
+ { uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x277, x316, &x335);
+ { uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x280, x319, &x338);
+ { uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x283, x322, &x341);
+ { uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x286, x325, &x344);
+ { uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x289, x327, &x347);
+ { uint8_t x349 = (x348 + x290);
+ { uint32_t x352; uint32_t x351 = _mulx_u32(x11, x15, &x352);
+ { uint32_t x355; uint32_t x354 = _mulx_u32(x11, x17, &x355);
+ { uint32_t x358; uint32_t x357 = _mulx_u32(x11, x19, &x358);
+ { uint32_t x361; uint32_t x360 = _mulx_u32(x11, x21, &x361);
+ { uint32_t x364; uint32_t x363 = _mulx_u32(x11, x23, &x364);
+ { uint32_t x367; uint32_t x366 = _mulx_u32(x11, x22, &x367);
+ { uint32_t x369; uint8_t x370 = _addcarryx_u32(0x0, x352, x354, &x369);
+ { uint32_t x372; uint8_t x373 = _addcarryx_u32(x370, x355, x357, &x372);
+ { uint32_t x375; uint8_t x376 = _addcarryx_u32(x373, x358, x360, &x375);
+ { uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
+ { uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x364, x366, &x381);
+ { uint32_t x384; uint8_t _ = _addcarryx_u32(0x0, x382, x367, &x384);
+ { uint32_t x387; uint8_t x388 = _addcarryx_u32(0x0, x332, x351, &x387);
+ { uint32_t x390; uint8_t x391 = _addcarryx_u32(x388, x335, x369, &x390);
+ { uint32_t x393; uint8_t x394 = _addcarryx_u32(x391, x338, x372, &x393);
+ { uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
+ { uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
+ { uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x347, x381, &x402);
+ { uint32_t x405; uint8_t x406 = _addcarryx_u32(x403, x349, x384, &x405);
+ { uint32_t _; uint32_t x408 = _mulx_u32(x387, 0xc28f5c29, &_);
+ { uint32_t x412; uint32_t x411 = _mulx_u32(x408, 0xffffffe7, &x412);
+ { uint32_t x415; uint32_t x414 = _mulx_u32(x408, 0xffffffff, &x415);
+ { uint32_t x418; uint32_t x417 = _mulx_u32(x408, 0xffffffff, &x418);
+ { uint32_t x421; uint32_t x420 = _mulx_u32(x408, 0xffffffff, &x421);
+ { uint32_t x424; uint32_t x423 = _mulx_u32(x408, 0xffffffff, &x424);
+ { uint32_t x426, uint8_t x427 = Op (Syntax.MulSplit 32 (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x408, 0x1f);
+ { uint32_t x429; uint8_t x430 = _addcarryx_u32(0x0, x412, x414, &x429);
+ { uint32_t x432; uint8_t x433 = _addcarryx_u32(x430, x415, x417, &x432);
+ { uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x418, x420, &x435);
+ { uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x421, x423, &x438);
+ { uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x424, x426, &x441);
+ { uint8_t x443 = (x442 + x427);
+ { uint32_t _; uint8_t x446 = _addcarryx_u32(0x0, x387, x411, &_);
+ { uint32_t x448; uint8_t x449 = _addcarryx_u32(x446, x390, x429, &x448);
+ { uint32_t x451; uint8_t x452 = _addcarryx_u32(x449, x393, x432, &x451);
+ { uint32_t x454; uint8_t x455 = _addcarryx_u32(x452, x396, x435, &x454);
+ { uint32_t x457; uint8_t x458 = _addcarryx_u32(x455, x399, x438, &x457);
+ { uint32_t x460; uint8_t x461 = _addcarryx_u32(x458, x402, x441, &x460);
+ { uint32_t x463; uint8_t x464 = _addcarryx_u32(x461, x405, x443, &x463);
+ { uint8_t x465 = (x464 + x406);
+ { uint32_t x468; uint32_t x467 = _mulx_u32(x13, x15, &x468);
+ { uint32_t x471; uint32_t x470 = _mulx_u32(x13, x17, &x471);
+ { uint32_t x474; uint32_t x473 = _mulx_u32(x13, x19, &x474);
+ { uint32_t x477; uint32_t x476 = _mulx_u32(x13, x21, &x477);
+ { uint32_t x480; uint32_t x479 = _mulx_u32(x13, x23, &x480);
+ { uint32_t x483; uint32_t x482 = _mulx_u32(x13, x22, &x483);
+ { uint32_t x485; uint8_t x486 = _addcarryx_u32(0x0, x468, x470, &x485);
+ { uint32_t x488; uint8_t x489 = _addcarryx_u32(x486, x471, x473, &x488);
+ { uint32_t x491; uint8_t x492 = _addcarryx_u32(x489, x474, x476, &x491);
+ { uint32_t x494; uint8_t x495 = _addcarryx_u32(x492, x477, x479, &x494);
+ { uint32_t x497; uint8_t x498 = _addcarryx_u32(x495, x480, x482, &x497);
+ { uint32_t x500; uint8_t _ = _addcarryx_u32(0x0, x498, x483, &x500);
+ { uint32_t x503; uint8_t x504 = _addcarryx_u32(0x0, x448, x467, &x503);
+ { uint32_t x506; uint8_t x507 = _addcarryx_u32(x504, x451, x485, &x506);
+ { uint32_t x509; uint8_t x510 = _addcarryx_u32(x507, x454, x488, &x509);
+ { uint32_t x512; uint8_t x513 = _addcarryx_u32(x510, x457, x491, &x512);
+ { uint32_t x515; uint8_t x516 = _addcarryx_u32(x513, x460, x494, &x515);
+ { uint32_t x518; uint8_t x519 = _addcarryx_u32(x516, x463, x497, &x518);
+ { uint32_t x521; uint8_t x522 = _addcarryx_u32(x519, x465, x500, &x521);
+ { uint32_t _; uint32_t x524 = _mulx_u32(x503, 0xc28f5c29, &_);
+ { uint32_t x528; uint32_t x527 = _mulx_u32(x524, 0xffffffe7, &x528);
+ { uint32_t x531; uint32_t x530 = _mulx_u32(x524, 0xffffffff, &x531);
+ { uint32_t x534; uint32_t x533 = _mulx_u32(x524, 0xffffffff, &x534);
+ { uint32_t x537; uint32_t x536 = _mulx_u32(x524, 0xffffffff, &x537);
+ { uint32_t x540; uint32_t x539 = _mulx_u32(x524, 0xffffffff, &x540);
+ { uint32_t x542, uint8_t x543 = Op (Syntax.MulSplit 32 (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x524, 0x1f);
+ { uint32_t x545; uint8_t x546 = _addcarryx_u32(0x0, x528, x530, &x545);
+ { uint32_t x548; uint8_t x549 = _addcarryx_u32(x546, x531, x533, &x548);
+ { uint32_t x551; uint8_t x552 = _addcarryx_u32(x549, x534, x536, &x551);
+ { uint32_t x554; uint8_t x555 = _addcarryx_u32(x552, x537, x539, &x554);
+ { uint32_t x557; uint8_t x558 = _addcarryx_u32(x555, x540, x542, &x557);
+ { uint8_t x559 = (x558 + x543);
+ { uint32_t _; uint8_t x562 = _addcarryx_u32(0x0, x503, x527, &_);
+ { uint32_t x564; uint8_t x565 = _addcarryx_u32(x562, x506, x545, &x564);
+ { uint32_t x567; uint8_t x568 = _addcarryx_u32(x565, x509, x548, &x567);
+ { uint32_t x570; uint8_t x571 = _addcarryx_u32(x568, x512, x551, &x570);
+ { uint32_t x573; uint8_t x574 = _addcarryx_u32(x571, x515, x554, &x573);
+ { uint32_t x576; uint8_t x577 = _addcarryx_u32(x574, x518, x557, &x576);
+ { uint32_t x579; uint8_t x580 = _addcarryx_u32(x577, x521, x559, &x579);
+ { uint8_t x581 = (x580 + x522);
+ { uint32_t x584; uint32_t x583 = _mulx_u32(x12, x15, &x584);
+ { uint32_t x587; uint32_t x586 = _mulx_u32(x12, x17, &x587);
+ { uint32_t x590; uint32_t x589 = _mulx_u32(x12, x19, &x590);
+ { uint32_t x593; uint32_t x592 = _mulx_u32(x12, x21, &x593);
+ { uint32_t x596; uint32_t x595 = _mulx_u32(x12, x23, &x596);
+ { uint32_t x599; uint32_t x598 = _mulx_u32(x12, x22, &x599);
+ { uint32_t x601; uint8_t x602 = _addcarryx_u32(0x0, x584, x586, &x601);
+ { uint32_t x604; uint8_t x605 = _addcarryx_u32(x602, x587, x589, &x604);
+ { uint32_t x607; uint8_t x608 = _addcarryx_u32(x605, x590, x592, &x607);
+ { uint32_t x610; uint8_t x611 = _addcarryx_u32(x608, x593, x595, &x610);
+ { uint32_t x613; uint8_t x614 = _addcarryx_u32(x611, x596, x598, &x613);
+ { uint32_t x616; uint8_t _ = _addcarryx_u32(0x0, x614, x599, &x616);
+ { uint32_t x619; uint8_t x620 = _addcarryx_u32(0x0, x564, x583, &x619);
+ { uint32_t x622; uint8_t x623 = _addcarryx_u32(x620, x567, x601, &x622);
+ { uint32_t x625; uint8_t x626 = _addcarryx_u32(x623, x570, x604, &x625);
+ { uint32_t x628; uint8_t x629 = _addcarryx_u32(x626, x573, x607, &x628);
+ { uint32_t x631; uint8_t x632 = _addcarryx_u32(x629, x576, x610, &x631);
+ { uint32_t x634; uint8_t x635 = _addcarryx_u32(x632, x579, x613, &x634);
+ { uint32_t x637; uint8_t x638 = _addcarryx_u32(x635, x581, x616, &x637);
+ { uint32_t _; uint32_t x640 = _mulx_u32(x619, 0xc28f5c29, &_);
+ { uint32_t x644; uint32_t x643 = _mulx_u32(x640, 0xffffffe7, &x644);
+ { uint32_t x647; uint32_t x646 = _mulx_u32(x640, 0xffffffff, &x647);
+ { uint32_t x650; uint32_t x649 = _mulx_u32(x640, 0xffffffff, &x650);
+ { uint32_t x653; uint32_t x652 = _mulx_u32(x640, 0xffffffff, &x653);
+ { uint32_t x656; uint32_t x655 = _mulx_u32(x640, 0xffffffff, &x656);
+ { uint32_t x658, uint8_t x659 = Op (Syntax.MulSplit 32 (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x640, 0x1f);
+ { uint32_t x661; uint8_t x662 = _addcarryx_u32(0x0, x644, x646, &x661);
+ { uint32_t x664; uint8_t x665 = _addcarryx_u32(x662, x647, x649, &x664);
+ { uint32_t x667; uint8_t x668 = _addcarryx_u32(x665, x650, x652, &x667);
+ { uint32_t x670; uint8_t x671 = _addcarryx_u32(x668, x653, x655, &x670);
+ { uint32_t x673; uint8_t x674 = _addcarryx_u32(x671, x656, x658, &x673);
+ { uint8_t x675 = (x674 + x659);
+ { uint32_t _; uint8_t x678 = _addcarryx_u32(0x0, x619, x643, &_);
+ { uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x622, x661, &x680);
+ { uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x625, x664, &x683);
+ { uint32_t x686; uint8_t x687 = _addcarryx_u32(x684, x628, x667, &x686);
+ { uint32_t x689; uint8_t x690 = _addcarryx_u32(x687, x631, x670, &x689);
+ { uint32_t x692; uint8_t x693 = _addcarryx_u32(x690, x634, x673, &x692);
+ { uint32_t x695; uint8_t x696 = _addcarryx_u32(x693, x637, x675, &x695);
+ { uint8_t x697 = (x696 + x638);
+ { uint32_t x699; uint8_t x700 = _subborrow_u32(0x0, x680, 0xffffffe7, &x699);
+ { uint32_t x702; uint8_t x703 = _subborrow_u32(x700, x683, 0xffffffff, &x702);
+ { uint32_t x705; uint8_t x706 = _subborrow_u32(x703, x686, 0xffffffff, &x705);
+ { uint32_t x708; uint8_t x709 = _subborrow_u32(x706, x689, 0xffffffff, &x708);
+ { uint32_t x711; uint8_t x712 = _subborrow_u32(x709, x692, 0xffffffff, &x711);
+ { uint32_t x714; uint8_t x715 = _subborrow_u32(x712, x695, 0x1f, &x714);
+ { uint32_t _; uint8_t x718 = _subborrow_u32(x715, x697, 0x0, &_);
+ { uint32_t x719 = cmovznz(x718, x714, x695);
+ { uint32_t x720 = cmovznz(x718, x711, x692);
+ { uint32_t x721 = cmovznz(x718, x708, x689);
+ { uint32_t x722 = cmovznz(x718, x705, x686);
+ { uint32_t x723 = cmovznz(x718, x702, x683);
+ { uint32_t x724 = cmovznz(x718, x699, x680);
+ out[0] = x724;
+ out[1] = x723;
+ out[2] = x722;
+ out[3] = x721;
+ out[4] = x720;
+ out[5] = x719;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e165m25/fenz.c b/src/Specific/montgomery32_2e165m25/fenz.c
index 6d8132b20..2e0454af1 100644
--- a/src/Specific/montgomery32_2e165m25/fenz.c
+++ b/src/Specific/montgomery32_2e165m25/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x11 = (x10 | x9);
-{ uint32_t x12 = (x8 | x11);
-{ uint32_t x13 = (x6 | x12);
-{ uint32_t x14 = (x4 | x13);
-{ uint32_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x11 = (x10 | x9);
+ { uint32_t x12 = (x8 | x11);
+ { uint32_t x13 = (x6 | x12);
+ { uint32_t x14 = (x4 | x13);
+ { uint32_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e165m25/feopp.c b/src/Specific/montgomery32_2e165m25/feopp.c
index 22ff19e12..c9aeeded1 100644
--- a/src/Specific/montgomery32_2e165m25/feopp.c
+++ b/src/Specific/montgomery32_2e165m25/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
-{ uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
-{ uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
-{ uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
-{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
-{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
-{ uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
-{ uint32_t x30 = (x29 & 0xffffffe7);
-{ uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
-{ uint32_t x34 = (x29 & 0xffffffff);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
-{ uint32_t x38 = (x29 & 0xffffffff);
-{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
-{ uint32_t x42 = (x29 & 0xffffffff);
-{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
-{ uint32_t x46 = (x29 & 0xffffffff);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
-{ uint8_t x50 = ((uint8_t)x29 & 0x1f);
-{ uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
+ { uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
+ { uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
+ { uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
+ { uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
+ { uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
+ { uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+ { uint32_t x30 = (x29 & 0xffffffe7);
+ { uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
+ { uint32_t x34 = (x29 & 0xffffffff);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
+ { uint32_t x38 = (x29 & 0xffffffff);
+ { uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
+ { uint32_t x42 = (x29 & 0xffffffff);
+ { uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
+ { uint32_t x46 = (x29 & 0xffffffff);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
+ { uint8_t x50 = ((uint8_t)x29 & 0x1f);
+ { uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e165m25/fesub.c b/src/Specific/montgomery32_2e165m25/fesub.c
index 2c2f9dadc..1df4a8069 100644
--- a/src/Specific/montgomery32_2e165m25/fesub.c
+++ b/src/Specific/montgomery32_2e165m25/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
-{ uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
-{ uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
-{ uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
-{ uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
-{ uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
-{ uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
-{ uint32_t x43 = (x42 & 0xffffffe7);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
-{ uint32_t x47 = (x42 & 0xffffffff);
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
-{ uint32_t x51 = (x42 & 0xffffffff);
-{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
-{ uint32_t x55 = (x42 & 0xffffffff);
-{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
-{ uint32_t x59 = (x42 & 0xffffffff);
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
-{ uint8_t x63 = ((uint8_t)x42 & 0x1f);
-{ uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
+ { uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
+ { uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
+ { uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
+ { uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
+ { uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
+ { uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
+ { uint32_t x43 = (x42 & 0xffffffe7);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
+ { uint32_t x47 = (x42 & 0xffffffff);
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
+ { uint32_t x51 = (x42 & 0xffffffff);
+ { uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
+ { uint32_t x55 = (x42 & 0xffffffff);
+ { uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
+ { uint32_t x59 = (x42 & 0xffffffff);
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
+ { uint8_t x63 = ((uint8_t)x42 & 0x1f);
+ { uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e166m5/feadd.c b/src/Specific/montgomery32_2e166m5/feadd.c
index 4d85b9fc4..2781fe1c1 100644
--- a/src/Specific/montgomery32_2e166m5/feadd.c
+++ b/src/Specific/montgomery32_2e166m5/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
-{ uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
-{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
-{ uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
-{ uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
-{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
-{ uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xfffffffb, &x43);
-{ uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
-{ uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
-{ uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
-{ uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
-{ uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x3f, &x58);
-{ uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
-{ uint32_t x63 = cmovznz(x62, x58, x40);
-{ uint32_t x64 = cmovznz(x62, x55, x37);
-{ uint32_t x65 = cmovznz(x62, x52, x34);
-{ uint32_t x66 = cmovznz(x62, x49, x31);
-{ uint32_t x67 = cmovznz(x62, x46, x28);
-{ uint32_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
+ { uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
+ { uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
+ { uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
+ { uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
+ { uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
+ { uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xfffffffb, &x43);
+ { uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
+ { uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
+ { uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
+ { uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
+ { uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x3f, &x58);
+ { uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
+ { uint32_t x63 = cmovznz(x62, x58, x40);
+ { uint32_t x64 = cmovznz(x62, x55, x37);
+ { uint32_t x65 = cmovznz(x62, x52, x34);
+ { uint32_t x66 = cmovznz(x62, x49, x31);
+ { uint32_t x67 = cmovznz(x62, x46, x28);
+ { uint32_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e166m5/femul.c b/src/Specific/montgomery32_2e166m5/femul.c
index 794cb8b04..0a48b7209 100644
--- a/src/Specific/montgomery32_2e166m5/femul.c
+++ b/src/Specific/montgomery32_2e166m5/femul.c
@@ -1,42 +1,266 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
-{ uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
-{ uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
-{ uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
-{ uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
-{ uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
-{ uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
-{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
-{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
-{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
-{ uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
-{ uint32_t _; uint32_t x61 = _mulx_u32(x25, 0xcccccccd, &_);
-{ uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xfffffffb, &x65);
-{ uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
-{ uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
-{ uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
-{ uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
-out[0] = uint32_t x79;
-out[1] = uint8_t x80 = Op Syntax.MulSplit 32 Syntax.TWord 5 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 3 x61;
-out[2] = 0x3f;;
-}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
+ { uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
+ { uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
+ { uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
+ { uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
+ { uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
+ { uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
+ { uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
+ { uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
+ { uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
+ { uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
+ { uint32_t _; uint32_t x61 = _mulx_u32(x25, 0xcccccccd, &_);
+ { uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xfffffffb, &x65);
+ { uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
+ { uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
+ { uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
+ { uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
+ { uint32_t x79, uint8_t x80 = Op (Syntax.MulSplit 32 (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x61, 0x3f);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(0x0, x65, x67, &x82);
+ { uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
+ { uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
+ { uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
+ { uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x77, x79, &x94);
+ { uint8_t x96 = (x95 + x80);
+ { uint32_t _; uint8_t x99 = _addcarryx_u32(0x0, x25, x64, &_);
+ { uint32_t x101; uint8_t x102 = _addcarryx_u32(x99, x43, x82, &x101);
+ { uint32_t x104; uint8_t x105 = _addcarryx_u32(x102, x46, x85, &x104);
+ { uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x49, x88, &x107);
+ { uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x52, x91, &x110);
+ { uint32_t x113; uint8_t x114 = _addcarryx_u32(x111, x55, x94, &x113);
+ { uint32_t x116; uint8_t x117 = _addcarryx_u32(x114, x58, x96, &x116);
+ { uint32_t x120; uint32_t x119 = _mulx_u32(x7, x15, &x120);
+ { uint32_t x123; uint32_t x122 = _mulx_u32(x7, x17, &x123);
+ { uint32_t x126; uint32_t x125 = _mulx_u32(x7, x19, &x126);
+ { uint32_t x129; uint32_t x128 = _mulx_u32(x7, x21, &x129);
+ { uint32_t x132; uint32_t x131 = _mulx_u32(x7, x23, &x132);
+ { uint32_t x135; uint32_t x134 = _mulx_u32(x7, x22, &x135);
+ { uint32_t x137; uint8_t x138 = _addcarryx_u32(0x0, x120, x122, &x137);
+ { uint32_t x140; uint8_t x141 = _addcarryx_u32(x138, x123, x125, &x140);
+ { uint32_t x143; uint8_t x144 = _addcarryx_u32(x141, x126, x128, &x143);
+ { uint32_t x146; uint8_t x147 = _addcarryx_u32(x144, x129, x131, &x146);
+ { uint32_t x149; uint8_t x150 = _addcarryx_u32(x147, x132, x134, &x149);
+ { uint32_t x152; uint8_t _ = _addcarryx_u32(0x0, x150, x135, &x152);
+ { uint32_t x155; uint8_t x156 = _addcarryx_u32(0x0, x101, x119, &x155);
+ { uint32_t x158; uint8_t x159 = _addcarryx_u32(x156, x104, x137, &x158);
+ { uint32_t x161; uint8_t x162 = _addcarryx_u32(x159, x107, x140, &x161);
+ { uint32_t x164; uint8_t x165 = _addcarryx_u32(x162, x110, x143, &x164);
+ { uint32_t x167; uint8_t x168 = _addcarryx_u32(x165, x113, x146, &x167);
+ { uint32_t x170; uint8_t x171 = _addcarryx_u32(x168, x116, x149, &x170);
+ { uint32_t x173; uint8_t x174 = _addcarryx_u32(x171, x117, x152, &x173);
+ { uint32_t _; uint32_t x176 = _mulx_u32(x155, 0xcccccccd, &_);
+ { uint32_t x180; uint32_t x179 = _mulx_u32(x176, 0xfffffffb, &x180);
+ { uint32_t x183; uint32_t x182 = _mulx_u32(x176, 0xffffffff, &x183);
+ { uint32_t x186; uint32_t x185 = _mulx_u32(x176, 0xffffffff, &x186);
+ { uint32_t x189; uint32_t x188 = _mulx_u32(x176, 0xffffffff, &x189);
+ { uint32_t x192; uint32_t x191 = _mulx_u32(x176, 0xffffffff, &x192);
+ { uint32_t x194, uint8_t x195 = Op (Syntax.MulSplit 32 (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x176, 0x3f);
+ { uint32_t x197; uint8_t x198 = _addcarryx_u32(0x0, x180, x182, &x197);
+ { uint32_t x200; uint8_t x201 = _addcarryx_u32(x198, x183, x185, &x200);
+ { uint32_t x203; uint8_t x204 = _addcarryx_u32(x201, x186, x188, &x203);
+ { uint32_t x206; uint8_t x207 = _addcarryx_u32(x204, x189, x191, &x206);
+ { uint32_t x209; uint8_t x210 = _addcarryx_u32(x207, x192, x194, &x209);
+ { uint8_t x211 = (x210 + x195);
+ { uint32_t _; uint8_t x214 = _addcarryx_u32(0x0, x155, x179, &_);
+ { uint32_t x216; uint8_t x217 = _addcarryx_u32(x214, x158, x197, &x216);
+ { uint32_t x219; uint8_t x220 = _addcarryx_u32(x217, x161, x200, &x219);
+ { uint32_t x222; uint8_t x223 = _addcarryx_u32(x220, x164, x203, &x222);
+ { uint32_t x225; uint8_t x226 = _addcarryx_u32(x223, x167, x206, &x225);
+ { uint32_t x228; uint8_t x229 = _addcarryx_u32(x226, x170, x209, &x228);
+ { uint32_t x231; uint8_t x232 = _addcarryx_u32(x229, x173, x211, &x231);
+ { uint8_t x233 = (x232 + x174);
+ { uint32_t x236; uint32_t x235 = _mulx_u32(x9, x15, &x236);
+ { uint32_t x239; uint32_t x238 = _mulx_u32(x9, x17, &x239);
+ { uint32_t x242; uint32_t x241 = _mulx_u32(x9, x19, &x242);
+ { uint32_t x245; uint32_t x244 = _mulx_u32(x9, x21, &x245);
+ { uint32_t x248; uint32_t x247 = _mulx_u32(x9, x23, &x248);
+ { uint32_t x251; uint32_t x250 = _mulx_u32(x9, x22, &x251);
+ { uint32_t x253; uint8_t x254 = _addcarryx_u32(0x0, x236, x238, &x253);
+ { uint32_t x256; uint8_t x257 = _addcarryx_u32(x254, x239, x241, &x256);
+ { uint32_t x259; uint8_t x260 = _addcarryx_u32(x257, x242, x244, &x259);
+ { uint32_t x262; uint8_t x263 = _addcarryx_u32(x260, x245, x247, &x262);
+ { uint32_t x265; uint8_t x266 = _addcarryx_u32(x263, x248, x250, &x265);
+ { uint32_t x268; uint8_t _ = _addcarryx_u32(0x0, x266, x251, &x268);
+ { uint32_t x271; uint8_t x272 = _addcarryx_u32(0x0, x216, x235, &x271);
+ { uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x219, x253, &x274);
+ { uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x222, x256, &x277);
+ { uint32_t x280; uint8_t x281 = _addcarryx_u32(x278, x225, x259, &x280);
+ { uint32_t x283; uint8_t x284 = _addcarryx_u32(x281, x228, x262, &x283);
+ { uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x231, x265, &x286);
+ { uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x233, x268, &x289);
+ { uint32_t _; uint32_t x292 = _mulx_u32(x271, 0xcccccccd, &_);
+ { uint32_t x296; uint32_t x295 = _mulx_u32(x292, 0xfffffffb, &x296);
+ { uint32_t x299; uint32_t x298 = _mulx_u32(x292, 0xffffffff, &x299);
+ { uint32_t x302; uint32_t x301 = _mulx_u32(x292, 0xffffffff, &x302);
+ { uint32_t x305; uint32_t x304 = _mulx_u32(x292, 0xffffffff, &x305);
+ { uint32_t x308; uint32_t x307 = _mulx_u32(x292, 0xffffffff, &x308);
+ { uint32_t x310, uint8_t x311 = Op (Syntax.MulSplit 32 (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x292, 0x3f);
+ { uint32_t x313; uint8_t x314 = _addcarryx_u32(0x0, x296, x298, &x313);
+ { uint32_t x316; uint8_t x317 = _addcarryx_u32(x314, x299, x301, &x316);
+ { uint32_t x319; uint8_t x320 = _addcarryx_u32(x317, x302, x304, &x319);
+ { uint32_t x322; uint8_t x323 = _addcarryx_u32(x320, x305, x307, &x322);
+ { uint32_t x325; uint8_t x326 = _addcarryx_u32(x323, x308, x310, &x325);
+ { uint8_t x327 = (x326 + x311);
+ { uint32_t _; uint8_t x330 = _addcarryx_u32(0x0, x271, x295, &_);
+ { uint32_t x332; uint8_t x333 = _addcarryx_u32(x330, x274, x313, &x332);
+ { uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x277, x316, &x335);
+ { uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x280, x319, &x338);
+ { uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x283, x322, &x341);
+ { uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x286, x325, &x344);
+ { uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x289, x327, &x347);
+ { uint8_t x349 = (x348 + x290);
+ { uint32_t x352; uint32_t x351 = _mulx_u32(x11, x15, &x352);
+ { uint32_t x355; uint32_t x354 = _mulx_u32(x11, x17, &x355);
+ { uint32_t x358; uint32_t x357 = _mulx_u32(x11, x19, &x358);
+ { uint32_t x361; uint32_t x360 = _mulx_u32(x11, x21, &x361);
+ { uint32_t x364; uint32_t x363 = _mulx_u32(x11, x23, &x364);
+ { uint32_t x367; uint32_t x366 = _mulx_u32(x11, x22, &x367);
+ { uint32_t x369; uint8_t x370 = _addcarryx_u32(0x0, x352, x354, &x369);
+ { uint32_t x372; uint8_t x373 = _addcarryx_u32(x370, x355, x357, &x372);
+ { uint32_t x375; uint8_t x376 = _addcarryx_u32(x373, x358, x360, &x375);
+ { uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
+ { uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x364, x366, &x381);
+ { uint32_t x384; uint8_t _ = _addcarryx_u32(0x0, x382, x367, &x384);
+ { uint32_t x387; uint8_t x388 = _addcarryx_u32(0x0, x332, x351, &x387);
+ { uint32_t x390; uint8_t x391 = _addcarryx_u32(x388, x335, x369, &x390);
+ { uint32_t x393; uint8_t x394 = _addcarryx_u32(x391, x338, x372, &x393);
+ { uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
+ { uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
+ { uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x347, x381, &x402);
+ { uint32_t x405; uint8_t x406 = _addcarryx_u32(x403, x349, x384, &x405);
+ { uint32_t _; uint32_t x408 = _mulx_u32(x387, 0xcccccccd, &_);
+ { uint32_t x412; uint32_t x411 = _mulx_u32(x408, 0xfffffffb, &x412);
+ { uint32_t x415; uint32_t x414 = _mulx_u32(x408, 0xffffffff, &x415);
+ { uint32_t x418; uint32_t x417 = _mulx_u32(x408, 0xffffffff, &x418);
+ { uint32_t x421; uint32_t x420 = _mulx_u32(x408, 0xffffffff, &x421);
+ { uint32_t x424; uint32_t x423 = _mulx_u32(x408, 0xffffffff, &x424);
+ { uint32_t x426, uint8_t x427 = Op (Syntax.MulSplit 32 (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x408, 0x3f);
+ { uint32_t x429; uint8_t x430 = _addcarryx_u32(0x0, x412, x414, &x429);
+ { uint32_t x432; uint8_t x433 = _addcarryx_u32(x430, x415, x417, &x432);
+ { uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x418, x420, &x435);
+ { uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x421, x423, &x438);
+ { uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x424, x426, &x441);
+ { uint8_t x443 = (x442 + x427);
+ { uint32_t _; uint8_t x446 = _addcarryx_u32(0x0, x387, x411, &_);
+ { uint32_t x448; uint8_t x449 = _addcarryx_u32(x446, x390, x429, &x448);
+ { uint32_t x451; uint8_t x452 = _addcarryx_u32(x449, x393, x432, &x451);
+ { uint32_t x454; uint8_t x455 = _addcarryx_u32(x452, x396, x435, &x454);
+ { uint32_t x457; uint8_t x458 = _addcarryx_u32(x455, x399, x438, &x457);
+ { uint32_t x460; uint8_t x461 = _addcarryx_u32(x458, x402, x441, &x460);
+ { uint32_t x463; uint8_t x464 = _addcarryx_u32(x461, x405, x443, &x463);
+ { uint8_t x465 = (x464 + x406);
+ { uint32_t x468; uint32_t x467 = _mulx_u32(x13, x15, &x468);
+ { uint32_t x471; uint32_t x470 = _mulx_u32(x13, x17, &x471);
+ { uint32_t x474; uint32_t x473 = _mulx_u32(x13, x19, &x474);
+ { uint32_t x477; uint32_t x476 = _mulx_u32(x13, x21, &x477);
+ { uint32_t x480; uint32_t x479 = _mulx_u32(x13, x23, &x480);
+ { uint32_t x483; uint32_t x482 = _mulx_u32(x13, x22, &x483);
+ { uint32_t x485; uint8_t x486 = _addcarryx_u32(0x0, x468, x470, &x485);
+ { uint32_t x488; uint8_t x489 = _addcarryx_u32(x486, x471, x473, &x488);
+ { uint32_t x491; uint8_t x492 = _addcarryx_u32(x489, x474, x476, &x491);
+ { uint32_t x494; uint8_t x495 = _addcarryx_u32(x492, x477, x479, &x494);
+ { uint32_t x497; uint8_t x498 = _addcarryx_u32(x495, x480, x482, &x497);
+ { uint32_t x500; uint8_t _ = _addcarryx_u32(0x0, x498, x483, &x500);
+ { uint32_t x503; uint8_t x504 = _addcarryx_u32(0x0, x448, x467, &x503);
+ { uint32_t x506; uint8_t x507 = _addcarryx_u32(x504, x451, x485, &x506);
+ { uint32_t x509; uint8_t x510 = _addcarryx_u32(x507, x454, x488, &x509);
+ { uint32_t x512; uint8_t x513 = _addcarryx_u32(x510, x457, x491, &x512);
+ { uint32_t x515; uint8_t x516 = _addcarryx_u32(x513, x460, x494, &x515);
+ { uint32_t x518; uint8_t x519 = _addcarryx_u32(x516, x463, x497, &x518);
+ { uint32_t x521; uint8_t x522 = _addcarryx_u32(x519, x465, x500, &x521);
+ { uint32_t _; uint32_t x524 = _mulx_u32(x503, 0xcccccccd, &_);
+ { uint32_t x528; uint32_t x527 = _mulx_u32(x524, 0xfffffffb, &x528);
+ { uint32_t x531; uint32_t x530 = _mulx_u32(x524, 0xffffffff, &x531);
+ { uint32_t x534; uint32_t x533 = _mulx_u32(x524, 0xffffffff, &x534);
+ { uint32_t x537; uint32_t x536 = _mulx_u32(x524, 0xffffffff, &x537);
+ { uint32_t x540; uint32_t x539 = _mulx_u32(x524, 0xffffffff, &x540);
+ { uint32_t x542, uint8_t x543 = Op (Syntax.MulSplit 32 (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x524, 0x3f);
+ { uint32_t x545; uint8_t x546 = _addcarryx_u32(0x0, x528, x530, &x545);
+ { uint32_t x548; uint8_t x549 = _addcarryx_u32(x546, x531, x533, &x548);
+ { uint32_t x551; uint8_t x552 = _addcarryx_u32(x549, x534, x536, &x551);
+ { uint32_t x554; uint8_t x555 = _addcarryx_u32(x552, x537, x539, &x554);
+ { uint32_t x557; uint8_t x558 = _addcarryx_u32(x555, x540, x542, &x557);
+ { uint8_t x559 = (x558 + x543);
+ { uint32_t _; uint8_t x562 = _addcarryx_u32(0x0, x503, x527, &_);
+ { uint32_t x564; uint8_t x565 = _addcarryx_u32(x562, x506, x545, &x564);
+ { uint32_t x567; uint8_t x568 = _addcarryx_u32(x565, x509, x548, &x567);
+ { uint32_t x570; uint8_t x571 = _addcarryx_u32(x568, x512, x551, &x570);
+ { uint32_t x573; uint8_t x574 = _addcarryx_u32(x571, x515, x554, &x573);
+ { uint32_t x576; uint8_t x577 = _addcarryx_u32(x574, x518, x557, &x576);
+ { uint32_t x579; uint8_t x580 = _addcarryx_u32(x577, x521, x559, &x579);
+ { uint8_t x581 = (x580 + x522);
+ { uint32_t x584; uint32_t x583 = _mulx_u32(x12, x15, &x584);
+ { uint32_t x587; uint32_t x586 = _mulx_u32(x12, x17, &x587);
+ { uint32_t x590; uint32_t x589 = _mulx_u32(x12, x19, &x590);
+ { uint32_t x593; uint32_t x592 = _mulx_u32(x12, x21, &x593);
+ { uint32_t x596; uint32_t x595 = _mulx_u32(x12, x23, &x596);
+ { uint32_t x599; uint32_t x598 = _mulx_u32(x12, x22, &x599);
+ { uint32_t x601; uint8_t x602 = _addcarryx_u32(0x0, x584, x586, &x601);
+ { uint32_t x604; uint8_t x605 = _addcarryx_u32(x602, x587, x589, &x604);
+ { uint32_t x607; uint8_t x608 = _addcarryx_u32(x605, x590, x592, &x607);
+ { uint32_t x610; uint8_t x611 = _addcarryx_u32(x608, x593, x595, &x610);
+ { uint32_t x613; uint8_t x614 = _addcarryx_u32(x611, x596, x598, &x613);
+ { uint32_t x616; uint8_t _ = _addcarryx_u32(0x0, x614, x599, &x616);
+ { uint32_t x619; uint8_t x620 = _addcarryx_u32(0x0, x564, x583, &x619);
+ { uint32_t x622; uint8_t x623 = _addcarryx_u32(x620, x567, x601, &x622);
+ { uint32_t x625; uint8_t x626 = _addcarryx_u32(x623, x570, x604, &x625);
+ { uint32_t x628; uint8_t x629 = _addcarryx_u32(x626, x573, x607, &x628);
+ { uint32_t x631; uint8_t x632 = _addcarryx_u32(x629, x576, x610, &x631);
+ { uint32_t x634; uint8_t x635 = _addcarryx_u32(x632, x579, x613, &x634);
+ { uint32_t x637; uint8_t x638 = _addcarryx_u32(x635, x581, x616, &x637);
+ { uint32_t _; uint32_t x640 = _mulx_u32(x619, 0xcccccccd, &_);
+ { uint32_t x644; uint32_t x643 = _mulx_u32(x640, 0xfffffffb, &x644);
+ { uint32_t x647; uint32_t x646 = _mulx_u32(x640, 0xffffffff, &x647);
+ { uint32_t x650; uint32_t x649 = _mulx_u32(x640, 0xffffffff, &x650);
+ { uint32_t x653; uint32_t x652 = _mulx_u32(x640, 0xffffffff, &x653);
+ { uint32_t x656; uint32_t x655 = _mulx_u32(x640, 0xffffffff, &x656);
+ { uint32_t x658, uint8_t x659 = Op (Syntax.MulSplit 32 (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x640, 0x3f);
+ { uint32_t x661; uint8_t x662 = _addcarryx_u32(0x0, x644, x646, &x661);
+ { uint32_t x664; uint8_t x665 = _addcarryx_u32(x662, x647, x649, &x664);
+ { uint32_t x667; uint8_t x668 = _addcarryx_u32(x665, x650, x652, &x667);
+ { uint32_t x670; uint8_t x671 = _addcarryx_u32(x668, x653, x655, &x670);
+ { uint32_t x673; uint8_t x674 = _addcarryx_u32(x671, x656, x658, &x673);
+ { uint8_t x675 = (x674 + x659);
+ { uint32_t _; uint8_t x678 = _addcarryx_u32(0x0, x619, x643, &_);
+ { uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x622, x661, &x680);
+ { uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x625, x664, &x683);
+ { uint32_t x686; uint8_t x687 = _addcarryx_u32(x684, x628, x667, &x686);
+ { uint32_t x689; uint8_t x690 = _addcarryx_u32(x687, x631, x670, &x689);
+ { uint32_t x692; uint8_t x693 = _addcarryx_u32(x690, x634, x673, &x692);
+ { uint32_t x695; uint8_t x696 = _addcarryx_u32(x693, x637, x675, &x695);
+ { uint8_t x697 = (x696 + x638);
+ { uint32_t x699; uint8_t x700 = _subborrow_u32(0x0, x680, 0xfffffffb, &x699);
+ { uint32_t x702; uint8_t x703 = _subborrow_u32(x700, x683, 0xffffffff, &x702);
+ { uint32_t x705; uint8_t x706 = _subborrow_u32(x703, x686, 0xffffffff, &x705);
+ { uint32_t x708; uint8_t x709 = _subborrow_u32(x706, x689, 0xffffffff, &x708);
+ { uint32_t x711; uint8_t x712 = _subborrow_u32(x709, x692, 0xffffffff, &x711);
+ { uint32_t x714; uint8_t x715 = _subborrow_u32(x712, x695, 0x3f, &x714);
+ { uint32_t _; uint8_t x718 = _subborrow_u32(x715, x697, 0x0, &_);
+ { uint32_t x719 = cmovznz(x718, x714, x695);
+ { uint32_t x720 = cmovznz(x718, x711, x692);
+ { uint32_t x721 = cmovznz(x718, x708, x689);
+ { uint32_t x722 = cmovznz(x718, x705, x686);
+ { uint32_t x723 = cmovznz(x718, x702, x683);
+ { uint32_t x724 = cmovznz(x718, x699, x680);
+ out[0] = x724;
+ out[1] = x723;
+ out[2] = x722;
+ out[3] = x721;
+ out[4] = x720;
+ out[5] = x719;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e166m5/fenz.c b/src/Specific/montgomery32_2e166m5/fenz.c
index 6d8132b20..2e0454af1 100644
--- a/src/Specific/montgomery32_2e166m5/fenz.c
+++ b/src/Specific/montgomery32_2e166m5/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x11 = (x10 | x9);
-{ uint32_t x12 = (x8 | x11);
-{ uint32_t x13 = (x6 | x12);
-{ uint32_t x14 = (x4 | x13);
-{ uint32_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x11 = (x10 | x9);
+ { uint32_t x12 = (x8 | x11);
+ { uint32_t x13 = (x6 | x12);
+ { uint32_t x14 = (x4 | x13);
+ { uint32_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e166m5/feopp.c b/src/Specific/montgomery32_2e166m5/feopp.c
index 38900e439..fcdca608c 100644
--- a/src/Specific/montgomery32_2e166m5/feopp.c
+++ b/src/Specific/montgomery32_2e166m5/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
-{ uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
-{ uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
-{ uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
-{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
-{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
-{ uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
-{ uint32_t x30 = (x29 & 0xfffffffb);
-{ uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
-{ uint32_t x34 = (x29 & 0xffffffff);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
-{ uint32_t x38 = (x29 & 0xffffffff);
-{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
-{ uint32_t x42 = (x29 & 0xffffffff);
-{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
-{ uint32_t x46 = (x29 & 0xffffffff);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
-{ uint8_t x50 = ((uint8_t)x29 & 0x3f);
-{ uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
+ { uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
+ { uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
+ { uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
+ { uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
+ { uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
+ { uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+ { uint32_t x30 = (x29 & 0xfffffffb);
+ { uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
+ { uint32_t x34 = (x29 & 0xffffffff);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
+ { uint32_t x38 = (x29 & 0xffffffff);
+ { uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
+ { uint32_t x42 = (x29 & 0xffffffff);
+ { uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
+ { uint32_t x46 = (x29 & 0xffffffff);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
+ { uint8_t x50 = ((uint8_t)x29 & 0x3f);
+ { uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e166m5/fesub.c b/src/Specific/montgomery32_2e166m5/fesub.c
index 9b4fcba2d..dd8a1a19f 100644
--- a/src/Specific/montgomery32_2e166m5/fesub.c
+++ b/src/Specific/montgomery32_2e166m5/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
-{ uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
-{ uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
-{ uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
-{ uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
-{ uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
-{ uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
-{ uint32_t x43 = (x42 & 0xfffffffb);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
-{ uint32_t x47 = (x42 & 0xffffffff);
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
-{ uint32_t x51 = (x42 & 0xffffffff);
-{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
-{ uint32_t x55 = (x42 & 0xffffffff);
-{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
-{ uint32_t x59 = (x42 & 0xffffffff);
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
-{ uint8_t x63 = ((uint8_t)x42 & 0x3f);
-{ uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
+ { uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
+ { uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
+ { uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
+ { uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
+ { uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
+ { uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
+ { uint32_t x43 = (x42 & 0xfffffffb);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
+ { uint32_t x47 = (x42 & 0xffffffff);
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
+ { uint32_t x51 = (x42 & 0xffffffff);
+ { uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
+ { uint32_t x55 = (x42 & 0xffffffff);
+ { uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
+ { uint32_t x59 = (x42 & 0xffffffff);
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
+ { uint8_t x63 = ((uint8_t)x42 & 0x3f);
+ { uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e171m19/feadd.c b/src/Specific/montgomery32_2e171m19/feadd.c
index 6d641caaf..6f51fc45e 100644
--- a/src/Specific/montgomery32_2e171m19/feadd.c
+++ b/src/Specific/montgomery32_2e171m19/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
-{ uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
-{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
-{ uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
-{ uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
-{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
-{ uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xffffffed, &x43);
-{ uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
-{ uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
-{ uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
-{ uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
-{ uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x7ff, &x58);
-{ uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
-{ uint32_t x63 = cmovznz(x62, x58, x40);
-{ uint32_t x64 = cmovznz(x62, x55, x37);
-{ uint32_t x65 = cmovznz(x62, x52, x34);
-{ uint32_t x66 = cmovznz(x62, x49, x31);
-{ uint32_t x67 = cmovznz(x62, x46, x28);
-{ uint32_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
+ { uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
+ { uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
+ { uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
+ { uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
+ { uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
+ { uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xffffffed, &x43);
+ { uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
+ { uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
+ { uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
+ { uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
+ { uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x7ff, &x58);
+ { uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
+ { uint32_t x63 = cmovznz(x62, x58, x40);
+ { uint32_t x64 = cmovznz(x62, x55, x37);
+ { uint32_t x65 = cmovznz(x62, x52, x34);
+ { uint32_t x66 = cmovznz(x62, x49, x31);
+ { uint32_t x67 = cmovznz(x62, x46, x28);
+ { uint32_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e171m19/femul.c b/src/Specific/montgomery32_2e171m19/femul.c
index c8fe55921..f1809e352 100644
--- a/src/Specific/montgomery32_2e171m19/femul.c
+++ b/src/Specific/montgomery32_2e171m19/femul.c
@@ -1,272 +1,266 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
-{ uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
-{ uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
-{ uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
-{ uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
-{ uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
-{ uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
-{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
-{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
-{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
-{ uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
-{ uint32_t _; uint32_t x61 = _mulx_u32(x25, 0x286bca1b, &_);
-{ uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xffffffed, &x65);
-{ uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
-{ uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
-{ uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
-{ uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
-{ uint32_t x80; uint32_t x79 = _mulx_u32(x61, 0x7ff, &x80);
-{ uint32_t x82; uint8_t x83 = _addcarryx_u32(0x0, x65, x67, &x82);
-{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
-{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
-{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
-{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x77, x79, &x94);
-{ uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x80, &x97);
-{ uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x25, x64, &_);
-{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x43, x82, &x103);
-{ uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x46, x85, &x106);
-{ uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x49, x88, &x109);
-{ uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x52, x91, &x112);
-{ uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x55, x94, &x115);
-{ uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x58, x97, &x118);
-{ uint32_t x122; uint32_t x121 = _mulx_u32(x7, x15, &x122);
-{ uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125);
-{ uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128);
-{ uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131);
-{ uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134);
-{ uint32_t x137; uint32_t x136 = _mulx_u32(x7, x22, &x137);
-{ uint32_t x139; uint8_t x140 = _addcarryx_u32(0x0, x122, x124, &x139);
-{ uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142);
-{ uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145);
-{ uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148);
-{ uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x134, x136, &x151);
-{ uint32_t x154; uint8_t _ = _addcarryx_u32(0x0, x152, x137, &x154);
-{ uint32_t x157; uint8_t x158 = _addcarryx_u32(0x0, x103, x121, &x157);
-{ uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160);
-{ uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163);
-{ uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166);
-{ uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169);
-{ uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x118, x151, &x172);
-{ uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x119, x154, &x175);
-{ uint32_t _; uint32_t x178 = _mulx_u32(x157, 0x286bca1b, &_);
-{ uint32_t x182; uint32_t x181 = _mulx_u32(x178, 0xffffffed, &x182);
-{ uint32_t x185; uint32_t x184 = _mulx_u32(x178, 0xffffffff, &x185);
-{ uint32_t x188; uint32_t x187 = _mulx_u32(x178, 0xffffffff, &x188);
-{ uint32_t x191; uint32_t x190 = _mulx_u32(x178, 0xffffffff, &x191);
-{ uint32_t x194; uint32_t x193 = _mulx_u32(x178, 0xffffffff, &x194);
-{ uint32_t x197; uint32_t x196 = _mulx_u32(x178, 0x7ff, &x197);
-{ uint32_t x199; uint8_t x200 = _addcarryx_u32(0x0, x182, x184, &x199);
-{ uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202);
-{ uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205);
-{ uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x191, x193, &x208);
-{ uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x194, x196, &x211);
-{ uint32_t x214; uint8_t _ = _addcarryx_u32(0x0, x212, x197, &x214);
-{ uint32_t _; uint8_t x218 = _addcarryx_u32(0x0, x157, x181, &_);
-{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x160, x199, &x220);
-{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x163, x202, &x223);
-{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x166, x205, &x226);
-{ uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x169, x208, &x229);
-{ uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x172, x211, &x232);
-{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x175, x214, &x235);
-{ uint8_t x237 = (x236 + x176);
-{ uint32_t x240; uint32_t x239 = _mulx_u32(x9, x15, &x240);
-{ uint32_t x243; uint32_t x242 = _mulx_u32(x9, x17, &x243);
-{ uint32_t x246; uint32_t x245 = _mulx_u32(x9, x19, &x246);
-{ uint32_t x249; uint32_t x248 = _mulx_u32(x9, x21, &x249);
-{ uint32_t x252; uint32_t x251 = _mulx_u32(x9, x23, &x252);
-{ uint32_t x255; uint32_t x254 = _mulx_u32(x9, x22, &x255);
-{ uint32_t x257; uint8_t x258 = _addcarryx_u32(0x0, x240, x242, &x257);
-{ uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260);
-{ uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263);
-{ uint32_t x266; uint8_t x267 = _addcarryx_u32(x264, x249, x251, &x266);
-{ uint32_t x269; uint8_t x270 = _addcarryx_u32(x267, x252, x254, &x269);
-{ uint32_t x272; uint8_t _ = _addcarryx_u32(0x0, x270, x255, &x272);
-{ uint32_t x275; uint8_t x276 = _addcarryx_u32(0x0, x220, x239, &x275);
-{ uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278);
-{ uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281);
-{ uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284);
-{ uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x232, x266, &x287);
-{ uint32_t x290; uint8_t x291 = _addcarryx_u32(x288, x235, x269, &x290);
-{ uint32_t x293; uint8_t x294 = _addcarryx_u32(x291, x237, x272, &x293);
-{ uint32_t _; uint32_t x296 = _mulx_u32(x275, 0x286bca1b, &_);
-{ uint32_t x300; uint32_t x299 = _mulx_u32(x296, 0xffffffed, &x300);
-{ uint32_t x303; uint32_t x302 = _mulx_u32(x296, 0xffffffff, &x303);
-{ uint32_t x306; uint32_t x305 = _mulx_u32(x296, 0xffffffff, &x306);
-{ uint32_t x309; uint32_t x308 = _mulx_u32(x296, 0xffffffff, &x309);
-{ uint32_t x312; uint32_t x311 = _mulx_u32(x296, 0xffffffff, &x312);
-{ uint32_t x315; uint32_t x314 = _mulx_u32(x296, 0x7ff, &x315);
-{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x300, x302, &x317);
-{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320);
-{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x306, x308, &x323);
-{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x309, x311, &x326);
-{ uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x312, x314, &x329);
-{ uint32_t x332; uint8_t _ = _addcarryx_u32(0x0, x330, x315, &x332);
-{ uint32_t _; uint8_t x336 = _addcarryx_u32(0x0, x275, x299, &_);
-{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x278, x317, &x338);
-{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x281, x320, &x341);
-{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x284, x323, &x344);
-{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x287, x326, &x347);
-{ uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x290, x329, &x350);
-{ uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x293, x332, &x353);
-{ uint8_t x355 = (x354 + x294);
-{ uint32_t x358; uint32_t x357 = _mulx_u32(x11, x15, &x358);
-{ uint32_t x361; uint32_t x360 = _mulx_u32(x11, x17, &x361);
-{ uint32_t x364; uint32_t x363 = _mulx_u32(x11, x19, &x364);
-{ uint32_t x367; uint32_t x366 = _mulx_u32(x11, x21, &x367);
-{ uint32_t x370; uint32_t x369 = _mulx_u32(x11, x23, &x370);
-{ uint32_t x373; uint32_t x372 = _mulx_u32(x11, x22, &x373);
-{ uint32_t x375; uint8_t x376 = _addcarryx_u32(0x0, x358, x360, &x375);
-{ uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
-{ uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x364, x366, &x381);
-{ uint32_t x384; uint8_t x385 = _addcarryx_u32(x382, x367, x369, &x384);
-{ uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x370, x372, &x387);
-{ uint32_t x390; uint8_t _ = _addcarryx_u32(0x0, x388, x373, &x390);
-{ uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x338, x357, &x393);
-{ uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
-{ uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
-{ uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x347, x381, &x402);
-{ uint32_t x405; uint8_t x406 = _addcarryx_u32(x403, x350, x384, &x405);
-{ uint32_t x408; uint8_t x409 = _addcarryx_u32(x406, x353, x387, &x408);
-{ uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x355, x390, &x411);
-{ uint32_t _; uint32_t x414 = _mulx_u32(x393, 0x286bca1b, &_);
-{ uint32_t x418; uint32_t x417 = _mulx_u32(x414, 0xffffffed, &x418);
-{ uint32_t x421; uint32_t x420 = _mulx_u32(x414, 0xffffffff, &x421);
-{ uint32_t x424; uint32_t x423 = _mulx_u32(x414, 0xffffffff, &x424);
-{ uint32_t x427; uint32_t x426 = _mulx_u32(x414, 0xffffffff, &x427);
-{ uint32_t x430; uint32_t x429 = _mulx_u32(x414, 0xffffffff, &x430);
-{ uint32_t x433; uint32_t x432 = _mulx_u32(x414, 0x7ff, &x433);
-{ uint32_t x435; uint8_t x436 = _addcarryx_u32(0x0, x418, x420, &x435);
-{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x421, x423, &x438);
-{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x424, x426, &x441);
-{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x427, x429, &x444);
-{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x430, x432, &x447);
-{ uint32_t x450; uint8_t _ = _addcarryx_u32(0x0, x448, x433, &x450);
-{ uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x393, x417, &_);
-{ uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x396, x435, &x456);
-{ uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x399, x438, &x459);
-{ uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x402, x441, &x462);
-{ uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x405, x444, &x465);
-{ uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x408, x447, &x468);
-{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x411, x450, &x471);
-{ uint8_t x473 = (x472 + x412);
-{ uint32_t x476; uint32_t x475 = _mulx_u32(x13, x15, &x476);
-{ uint32_t x479; uint32_t x478 = _mulx_u32(x13, x17, &x479);
-{ uint32_t x482; uint32_t x481 = _mulx_u32(x13, x19, &x482);
-{ uint32_t x485; uint32_t x484 = _mulx_u32(x13, x21, &x485);
-{ uint32_t x488; uint32_t x487 = _mulx_u32(x13, x23, &x488);
-{ uint32_t x491; uint32_t x490 = _mulx_u32(x13, x22, &x491);
-{ uint32_t x493; uint8_t x494 = _addcarryx_u32(0x0, x476, x478, &x493);
-{ uint32_t x496; uint8_t x497 = _addcarryx_u32(x494, x479, x481, &x496);
-{ uint32_t x499; uint8_t x500 = _addcarryx_u32(x497, x482, x484, &x499);
-{ uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x485, x487, &x502);
-{ uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x488, x490, &x505);
-{ uint32_t x508; uint8_t _ = _addcarryx_u32(0x0, x506, x491, &x508);
-{ uint32_t x511; uint8_t x512 = _addcarryx_u32(0x0, x456, x475, &x511);
-{ uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514);
-{ uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x462, x496, &x517);
-{ uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x465, x499, &x520);
-{ uint32_t x523; uint8_t x524 = _addcarryx_u32(x521, x468, x502, &x523);
-{ uint32_t x526; uint8_t x527 = _addcarryx_u32(x524, x471, x505, &x526);
-{ uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x473, x508, &x529);
-{ uint32_t _; uint32_t x532 = _mulx_u32(x511, 0x286bca1b, &_);
-{ uint32_t x536; uint32_t x535 = _mulx_u32(x532, 0xffffffed, &x536);
-{ uint32_t x539; uint32_t x538 = _mulx_u32(x532, 0xffffffff, &x539);
-{ uint32_t x542; uint32_t x541 = _mulx_u32(x532, 0xffffffff, &x542);
-{ uint32_t x545; uint32_t x544 = _mulx_u32(x532, 0xffffffff, &x545);
-{ uint32_t x548; uint32_t x547 = _mulx_u32(x532, 0xffffffff, &x548);
-{ uint32_t x551; uint32_t x550 = _mulx_u32(x532, 0x7ff, &x551);
-{ uint32_t x553; uint8_t x554 = _addcarryx_u32(0x0, x536, x538, &x553);
-{ uint32_t x556; uint8_t x557 = _addcarryx_u32(x554, x539, x541, &x556);
-{ uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x542, x544, &x559);
-{ uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x545, x547, &x562);
-{ uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x548, x550, &x565);
-{ uint32_t x568; uint8_t _ = _addcarryx_u32(0x0, x566, x551, &x568);
-{ uint32_t _; uint8_t x572 = _addcarryx_u32(0x0, x511, x535, &_);
-{ uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x514, x553, &x574);
-{ uint32_t x577; uint8_t x578 = _addcarryx_u32(x575, x517, x556, &x577);
-{ uint32_t x580; uint8_t x581 = _addcarryx_u32(x578, x520, x559, &x580);
-{ uint32_t x583; uint8_t x584 = _addcarryx_u32(x581, x523, x562, &x583);
-{ uint32_t x586; uint8_t x587 = _addcarryx_u32(x584, x526, x565, &x586);
-{ uint32_t x589; uint8_t x590 = _addcarryx_u32(x587, x529, x568, &x589);
-{ uint8_t x591 = (x590 + x530);
-{ uint32_t x594; uint32_t x593 = _mulx_u32(x12, x15, &x594);
-{ uint32_t x597; uint32_t x596 = _mulx_u32(x12, x17, &x597);
-{ uint32_t x600; uint32_t x599 = _mulx_u32(x12, x19, &x600);
-{ uint32_t x603; uint32_t x602 = _mulx_u32(x12, x21, &x603);
-{ uint32_t x606; uint32_t x605 = _mulx_u32(x12, x23, &x606);
-{ uint32_t x609; uint32_t x608 = _mulx_u32(x12, x22, &x609);
-{ uint32_t x611; uint8_t x612 = _addcarryx_u32(0x0, x594, x596, &x611);
-{ uint32_t x614; uint8_t x615 = _addcarryx_u32(x612, x597, x599, &x614);
-{ uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x600, x602, &x617);
-{ uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x603, x605, &x620);
-{ uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x606, x608, &x623);
-{ uint32_t x626; uint8_t _ = _addcarryx_u32(0x0, x624, x609, &x626);
-{ uint32_t x629; uint8_t x630 = _addcarryx_u32(0x0, x574, x593, &x629);
-{ uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x577, x611, &x632);
-{ uint32_t x635; uint8_t x636 = _addcarryx_u32(x633, x580, x614, &x635);
-{ uint32_t x638; uint8_t x639 = _addcarryx_u32(x636, x583, x617, &x638);
-{ uint32_t x641; uint8_t x642 = _addcarryx_u32(x639, x586, x620, &x641);
-{ uint32_t x644; uint8_t x645 = _addcarryx_u32(x642, x589, x623, &x644);
-{ uint32_t x647; uint8_t x648 = _addcarryx_u32(x645, x591, x626, &x647);
-{ uint32_t _; uint32_t x650 = _mulx_u32(x629, 0x286bca1b, &_);
-{ uint32_t x654; uint32_t x653 = _mulx_u32(x650, 0xffffffed, &x654);
-{ uint32_t x657; uint32_t x656 = _mulx_u32(x650, 0xffffffff, &x657);
-{ uint32_t x660; uint32_t x659 = _mulx_u32(x650, 0xffffffff, &x660);
-{ uint32_t x663; uint32_t x662 = _mulx_u32(x650, 0xffffffff, &x663);
-{ uint32_t x666; uint32_t x665 = _mulx_u32(x650, 0xffffffff, &x666);
-{ uint32_t x669; uint32_t x668 = _mulx_u32(x650, 0x7ff, &x669);
-{ uint32_t x671; uint8_t x672 = _addcarryx_u32(0x0, x654, x656, &x671);
-{ uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x657, x659, &x674);
-{ uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x660, x662, &x677);
-{ uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x663, x665, &x680);
-{ uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x666, x668, &x683);
-{ uint32_t x686; uint8_t _ = _addcarryx_u32(0x0, x684, x669, &x686);
-{ uint32_t _; uint8_t x690 = _addcarryx_u32(0x0, x629, x653, &_);
-{ uint32_t x692; uint8_t x693 = _addcarryx_u32(x690, x632, x671, &x692);
-{ uint32_t x695; uint8_t x696 = _addcarryx_u32(x693, x635, x674, &x695);
-{ uint32_t x698; uint8_t x699 = _addcarryx_u32(x696, x638, x677, &x698);
-{ uint32_t x701; uint8_t x702 = _addcarryx_u32(x699, x641, x680, &x701);
-{ uint32_t x704; uint8_t x705 = _addcarryx_u32(x702, x644, x683, &x704);
-{ uint32_t x707; uint8_t x708 = _addcarryx_u32(x705, x647, x686, &x707);
-{ uint8_t x709 = (x708 + x648);
-{ uint32_t x711; uint8_t x712 = _subborrow_u32(0x0, x692, 0xffffffed, &x711);
-{ uint32_t x714; uint8_t x715 = _subborrow_u32(x712, x695, 0xffffffff, &x714);
-{ uint32_t x717; uint8_t x718 = _subborrow_u32(x715, x698, 0xffffffff, &x717);
-{ uint32_t x720; uint8_t x721 = _subborrow_u32(x718, x701, 0xffffffff, &x720);
-{ uint32_t x723; uint8_t x724 = _subborrow_u32(x721, x704, 0xffffffff, &x723);
-{ uint32_t x726; uint8_t x727 = _subborrow_u32(x724, x707, 0x7ff, &x726);
-{ uint32_t _; uint8_t x730 = _subborrow_u32(x727, x709, 0x0, &_);
-{ uint32_t x731 = cmovznz(x730, x726, x707);
-{ uint32_t x732 = cmovznz(x730, x723, x704);
-{ uint32_t x733 = cmovznz(x730, x720, x701);
-{ uint32_t x734 = cmovznz(x730, x717, x698);
-{ uint32_t x735 = cmovznz(x730, x714, x695);
-{ uint32_t x736 = cmovznz(x730, x711, x692);
-out[0] = x731;
-out[1] = x732;
-out[2] = x733;
-out[3] = x734;
-out[4] = x735;
-out[5] = x736;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
+ { uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
+ { uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
+ { uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
+ { uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
+ { uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
+ { uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
+ { uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
+ { uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
+ { uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
+ { uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
+ { uint32_t _; uint32_t x61 = _mulx_u32(x25, 0x286bca1b, &_);
+ { uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xffffffed, &x65);
+ { uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
+ { uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
+ { uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
+ { uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
+ { uint32_t x80; uint32_t x79 = _mulx_u32(x61, 0x7ff, &x80);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(0x0, x65, x67, &x82);
+ { uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
+ { uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
+ { uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
+ { uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x77, x79, &x94);
+ { uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x80, &x97);
+ { uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x25, x64, &_);
+ { uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x43, x82, &x103);
+ { uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x46, x85, &x106);
+ { uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x49, x88, &x109);
+ { uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x52, x91, &x112);
+ { uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x55, x94, &x115);
+ { uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x58, x97, &x118);
+ { uint32_t x122; uint32_t x121 = _mulx_u32(x7, x15, &x122);
+ { uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125);
+ { uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128);
+ { uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131);
+ { uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134);
+ { uint32_t x137; uint32_t x136 = _mulx_u32(x7, x22, &x137);
+ { uint32_t x139; uint8_t x140 = _addcarryx_u32(0x0, x122, x124, &x139);
+ { uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142);
+ { uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145);
+ { uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148);
+ { uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x134, x136, &x151);
+ { uint32_t x154; uint8_t _ = _addcarryx_u32(0x0, x152, x137, &x154);
+ { uint32_t x157; uint8_t x158 = _addcarryx_u32(0x0, x103, x121, &x157);
+ { uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160);
+ { uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163);
+ { uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166);
+ { uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169);
+ { uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x118, x151, &x172);
+ { uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x119, x154, &x175);
+ { uint32_t _; uint32_t x178 = _mulx_u32(x157, 0x286bca1b, &_);
+ { uint32_t x182; uint32_t x181 = _mulx_u32(x178, 0xffffffed, &x182);
+ { uint32_t x185; uint32_t x184 = _mulx_u32(x178, 0xffffffff, &x185);
+ { uint32_t x188; uint32_t x187 = _mulx_u32(x178, 0xffffffff, &x188);
+ { uint32_t x191; uint32_t x190 = _mulx_u32(x178, 0xffffffff, &x191);
+ { uint32_t x194; uint32_t x193 = _mulx_u32(x178, 0xffffffff, &x194);
+ { uint32_t x197; uint32_t x196 = _mulx_u32(x178, 0x7ff, &x197);
+ { uint32_t x199; uint8_t x200 = _addcarryx_u32(0x0, x182, x184, &x199);
+ { uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202);
+ { uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205);
+ { uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x191, x193, &x208);
+ { uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x194, x196, &x211);
+ { uint32_t x214; uint8_t _ = _addcarryx_u32(0x0, x212, x197, &x214);
+ { uint32_t _; uint8_t x218 = _addcarryx_u32(0x0, x157, x181, &_);
+ { uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x160, x199, &x220);
+ { uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x163, x202, &x223);
+ { uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x166, x205, &x226);
+ { uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x169, x208, &x229);
+ { uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x172, x211, &x232);
+ { uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x175, x214, &x235);
+ { uint8_t x237 = (x236 + x176);
+ { uint32_t x240; uint32_t x239 = _mulx_u32(x9, x15, &x240);
+ { uint32_t x243; uint32_t x242 = _mulx_u32(x9, x17, &x243);
+ { uint32_t x246; uint32_t x245 = _mulx_u32(x9, x19, &x246);
+ { uint32_t x249; uint32_t x248 = _mulx_u32(x9, x21, &x249);
+ { uint32_t x252; uint32_t x251 = _mulx_u32(x9, x23, &x252);
+ { uint32_t x255; uint32_t x254 = _mulx_u32(x9, x22, &x255);
+ { uint32_t x257; uint8_t x258 = _addcarryx_u32(0x0, x240, x242, &x257);
+ { uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260);
+ { uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263);
+ { uint32_t x266; uint8_t x267 = _addcarryx_u32(x264, x249, x251, &x266);
+ { uint32_t x269; uint8_t x270 = _addcarryx_u32(x267, x252, x254, &x269);
+ { uint32_t x272; uint8_t _ = _addcarryx_u32(0x0, x270, x255, &x272);
+ { uint32_t x275; uint8_t x276 = _addcarryx_u32(0x0, x220, x239, &x275);
+ { uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278);
+ { uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281);
+ { uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284);
+ { uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x232, x266, &x287);
+ { uint32_t x290; uint8_t x291 = _addcarryx_u32(x288, x235, x269, &x290);
+ { uint32_t x293; uint8_t x294 = _addcarryx_u32(x291, x237, x272, &x293);
+ { uint32_t _; uint32_t x296 = _mulx_u32(x275, 0x286bca1b, &_);
+ { uint32_t x300; uint32_t x299 = _mulx_u32(x296, 0xffffffed, &x300);
+ { uint32_t x303; uint32_t x302 = _mulx_u32(x296, 0xffffffff, &x303);
+ { uint32_t x306; uint32_t x305 = _mulx_u32(x296, 0xffffffff, &x306);
+ { uint32_t x309; uint32_t x308 = _mulx_u32(x296, 0xffffffff, &x309);
+ { uint32_t x312; uint32_t x311 = _mulx_u32(x296, 0xffffffff, &x312);
+ { uint32_t x315; uint32_t x314 = _mulx_u32(x296, 0x7ff, &x315);
+ { uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x300, x302, &x317);
+ { uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320);
+ { uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x306, x308, &x323);
+ { uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x309, x311, &x326);
+ { uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x312, x314, &x329);
+ { uint32_t x332; uint8_t _ = _addcarryx_u32(0x0, x330, x315, &x332);
+ { uint32_t _; uint8_t x336 = _addcarryx_u32(0x0, x275, x299, &_);
+ { uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x278, x317, &x338);
+ { uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x281, x320, &x341);
+ { uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x284, x323, &x344);
+ { uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x287, x326, &x347);
+ { uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x290, x329, &x350);
+ { uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x293, x332, &x353);
+ { uint8_t x355 = (x354 + x294);
+ { uint32_t x358; uint32_t x357 = _mulx_u32(x11, x15, &x358);
+ { uint32_t x361; uint32_t x360 = _mulx_u32(x11, x17, &x361);
+ { uint32_t x364; uint32_t x363 = _mulx_u32(x11, x19, &x364);
+ { uint32_t x367; uint32_t x366 = _mulx_u32(x11, x21, &x367);
+ { uint32_t x370; uint32_t x369 = _mulx_u32(x11, x23, &x370);
+ { uint32_t x373; uint32_t x372 = _mulx_u32(x11, x22, &x373);
+ { uint32_t x375; uint8_t x376 = _addcarryx_u32(0x0, x358, x360, &x375);
+ { uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
+ { uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x364, x366, &x381);
+ { uint32_t x384; uint8_t x385 = _addcarryx_u32(x382, x367, x369, &x384);
+ { uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x370, x372, &x387);
+ { uint32_t x390; uint8_t _ = _addcarryx_u32(0x0, x388, x373, &x390);
+ { uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x338, x357, &x393);
+ { uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
+ { uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
+ { uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x347, x381, &x402);
+ { uint32_t x405; uint8_t x406 = _addcarryx_u32(x403, x350, x384, &x405);
+ { uint32_t x408; uint8_t x409 = _addcarryx_u32(x406, x353, x387, &x408);
+ { uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x355, x390, &x411);
+ { uint32_t _; uint32_t x414 = _mulx_u32(x393, 0x286bca1b, &_);
+ { uint32_t x418; uint32_t x417 = _mulx_u32(x414, 0xffffffed, &x418);
+ { uint32_t x421; uint32_t x420 = _mulx_u32(x414, 0xffffffff, &x421);
+ { uint32_t x424; uint32_t x423 = _mulx_u32(x414, 0xffffffff, &x424);
+ { uint32_t x427; uint32_t x426 = _mulx_u32(x414, 0xffffffff, &x427);
+ { uint32_t x430; uint32_t x429 = _mulx_u32(x414, 0xffffffff, &x430);
+ { uint32_t x433; uint32_t x432 = _mulx_u32(x414, 0x7ff, &x433);
+ { uint32_t x435; uint8_t x436 = _addcarryx_u32(0x0, x418, x420, &x435);
+ { uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x421, x423, &x438);
+ { uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x424, x426, &x441);
+ { uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x427, x429, &x444);
+ { uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x430, x432, &x447);
+ { uint32_t x450; uint8_t _ = _addcarryx_u32(0x0, x448, x433, &x450);
+ { uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x393, x417, &_);
+ { uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x396, x435, &x456);
+ { uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x399, x438, &x459);
+ { uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x402, x441, &x462);
+ { uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x405, x444, &x465);
+ { uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x408, x447, &x468);
+ { uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x411, x450, &x471);
+ { uint8_t x473 = (x472 + x412);
+ { uint32_t x476; uint32_t x475 = _mulx_u32(x13, x15, &x476);
+ { uint32_t x479; uint32_t x478 = _mulx_u32(x13, x17, &x479);
+ { uint32_t x482; uint32_t x481 = _mulx_u32(x13, x19, &x482);
+ { uint32_t x485; uint32_t x484 = _mulx_u32(x13, x21, &x485);
+ { uint32_t x488; uint32_t x487 = _mulx_u32(x13, x23, &x488);
+ { uint32_t x491; uint32_t x490 = _mulx_u32(x13, x22, &x491);
+ { uint32_t x493; uint8_t x494 = _addcarryx_u32(0x0, x476, x478, &x493);
+ { uint32_t x496; uint8_t x497 = _addcarryx_u32(x494, x479, x481, &x496);
+ { uint32_t x499; uint8_t x500 = _addcarryx_u32(x497, x482, x484, &x499);
+ { uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x485, x487, &x502);
+ { uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x488, x490, &x505);
+ { uint32_t x508; uint8_t _ = _addcarryx_u32(0x0, x506, x491, &x508);
+ { uint32_t x511; uint8_t x512 = _addcarryx_u32(0x0, x456, x475, &x511);
+ { uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514);
+ { uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x462, x496, &x517);
+ { uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x465, x499, &x520);
+ { uint32_t x523; uint8_t x524 = _addcarryx_u32(x521, x468, x502, &x523);
+ { uint32_t x526; uint8_t x527 = _addcarryx_u32(x524, x471, x505, &x526);
+ { uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x473, x508, &x529);
+ { uint32_t _; uint32_t x532 = _mulx_u32(x511, 0x286bca1b, &_);
+ { uint32_t x536; uint32_t x535 = _mulx_u32(x532, 0xffffffed, &x536);
+ { uint32_t x539; uint32_t x538 = _mulx_u32(x532, 0xffffffff, &x539);
+ { uint32_t x542; uint32_t x541 = _mulx_u32(x532, 0xffffffff, &x542);
+ { uint32_t x545; uint32_t x544 = _mulx_u32(x532, 0xffffffff, &x545);
+ { uint32_t x548; uint32_t x547 = _mulx_u32(x532, 0xffffffff, &x548);
+ { uint32_t x551; uint32_t x550 = _mulx_u32(x532, 0x7ff, &x551);
+ { uint32_t x553; uint8_t x554 = _addcarryx_u32(0x0, x536, x538, &x553);
+ { uint32_t x556; uint8_t x557 = _addcarryx_u32(x554, x539, x541, &x556);
+ { uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x542, x544, &x559);
+ { uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x545, x547, &x562);
+ { uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x548, x550, &x565);
+ { uint32_t x568; uint8_t _ = _addcarryx_u32(0x0, x566, x551, &x568);
+ { uint32_t _; uint8_t x572 = _addcarryx_u32(0x0, x511, x535, &_);
+ { uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x514, x553, &x574);
+ { uint32_t x577; uint8_t x578 = _addcarryx_u32(x575, x517, x556, &x577);
+ { uint32_t x580; uint8_t x581 = _addcarryx_u32(x578, x520, x559, &x580);
+ { uint32_t x583; uint8_t x584 = _addcarryx_u32(x581, x523, x562, &x583);
+ { uint32_t x586; uint8_t x587 = _addcarryx_u32(x584, x526, x565, &x586);
+ { uint32_t x589; uint8_t x590 = _addcarryx_u32(x587, x529, x568, &x589);
+ { uint8_t x591 = (x590 + x530);
+ { uint32_t x594; uint32_t x593 = _mulx_u32(x12, x15, &x594);
+ { uint32_t x597; uint32_t x596 = _mulx_u32(x12, x17, &x597);
+ { uint32_t x600; uint32_t x599 = _mulx_u32(x12, x19, &x600);
+ { uint32_t x603; uint32_t x602 = _mulx_u32(x12, x21, &x603);
+ { uint32_t x606; uint32_t x605 = _mulx_u32(x12, x23, &x606);
+ { uint32_t x609; uint32_t x608 = _mulx_u32(x12, x22, &x609);
+ { uint32_t x611; uint8_t x612 = _addcarryx_u32(0x0, x594, x596, &x611);
+ { uint32_t x614; uint8_t x615 = _addcarryx_u32(x612, x597, x599, &x614);
+ { uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x600, x602, &x617);
+ { uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x603, x605, &x620);
+ { uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x606, x608, &x623);
+ { uint32_t x626; uint8_t _ = _addcarryx_u32(0x0, x624, x609, &x626);
+ { uint32_t x629; uint8_t x630 = _addcarryx_u32(0x0, x574, x593, &x629);
+ { uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x577, x611, &x632);
+ { uint32_t x635; uint8_t x636 = _addcarryx_u32(x633, x580, x614, &x635);
+ { uint32_t x638; uint8_t x639 = _addcarryx_u32(x636, x583, x617, &x638);
+ { uint32_t x641; uint8_t x642 = _addcarryx_u32(x639, x586, x620, &x641);
+ { uint32_t x644; uint8_t x645 = _addcarryx_u32(x642, x589, x623, &x644);
+ { uint32_t x647; uint8_t x648 = _addcarryx_u32(x645, x591, x626, &x647);
+ { uint32_t _; uint32_t x650 = _mulx_u32(x629, 0x286bca1b, &_);
+ { uint32_t x654; uint32_t x653 = _mulx_u32(x650, 0xffffffed, &x654);
+ { uint32_t x657; uint32_t x656 = _mulx_u32(x650, 0xffffffff, &x657);
+ { uint32_t x660; uint32_t x659 = _mulx_u32(x650, 0xffffffff, &x660);
+ { uint32_t x663; uint32_t x662 = _mulx_u32(x650, 0xffffffff, &x663);
+ { uint32_t x666; uint32_t x665 = _mulx_u32(x650, 0xffffffff, &x666);
+ { uint32_t x669; uint32_t x668 = _mulx_u32(x650, 0x7ff, &x669);
+ { uint32_t x671; uint8_t x672 = _addcarryx_u32(0x0, x654, x656, &x671);
+ { uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x657, x659, &x674);
+ { uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x660, x662, &x677);
+ { uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x663, x665, &x680);
+ { uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x666, x668, &x683);
+ { uint32_t x686; uint8_t _ = _addcarryx_u32(0x0, x684, x669, &x686);
+ { uint32_t _; uint8_t x690 = _addcarryx_u32(0x0, x629, x653, &_);
+ { uint32_t x692; uint8_t x693 = _addcarryx_u32(x690, x632, x671, &x692);
+ { uint32_t x695; uint8_t x696 = _addcarryx_u32(x693, x635, x674, &x695);
+ { uint32_t x698; uint8_t x699 = _addcarryx_u32(x696, x638, x677, &x698);
+ { uint32_t x701; uint8_t x702 = _addcarryx_u32(x699, x641, x680, &x701);
+ { uint32_t x704; uint8_t x705 = _addcarryx_u32(x702, x644, x683, &x704);
+ { uint32_t x707; uint8_t x708 = _addcarryx_u32(x705, x647, x686, &x707);
+ { uint8_t x709 = (x708 + x648);
+ { uint32_t x711; uint8_t x712 = _subborrow_u32(0x0, x692, 0xffffffed, &x711);
+ { uint32_t x714; uint8_t x715 = _subborrow_u32(x712, x695, 0xffffffff, &x714);
+ { uint32_t x717; uint8_t x718 = _subborrow_u32(x715, x698, 0xffffffff, &x717);
+ { uint32_t x720; uint8_t x721 = _subborrow_u32(x718, x701, 0xffffffff, &x720);
+ { uint32_t x723; uint8_t x724 = _subborrow_u32(x721, x704, 0xffffffff, &x723);
+ { uint32_t x726; uint8_t x727 = _subborrow_u32(x724, x707, 0x7ff, &x726);
+ { uint32_t _; uint8_t x730 = _subborrow_u32(x727, x709, 0x0, &_);
+ { uint32_t x731 = cmovznz(x730, x726, x707);
+ { uint32_t x732 = cmovznz(x730, x723, x704);
+ { uint32_t x733 = cmovznz(x730, x720, x701);
+ { uint32_t x734 = cmovznz(x730, x717, x698);
+ { uint32_t x735 = cmovznz(x730, x714, x695);
+ { uint32_t x736 = cmovznz(x730, x711, x692);
+ out[0] = x736;
+ out[1] = x735;
+ out[2] = x734;
+ out[3] = x733;
+ out[4] = x732;
+ out[5] = x731;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e171m19/fenz.c b/src/Specific/montgomery32_2e171m19/fenz.c
index 6d8132b20..2e0454af1 100644
--- a/src/Specific/montgomery32_2e171m19/fenz.c
+++ b/src/Specific/montgomery32_2e171m19/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x11 = (x10 | x9);
-{ uint32_t x12 = (x8 | x11);
-{ uint32_t x13 = (x6 | x12);
-{ uint32_t x14 = (x4 | x13);
-{ uint32_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x11 = (x10 | x9);
+ { uint32_t x12 = (x8 | x11);
+ { uint32_t x13 = (x6 | x12);
+ { uint32_t x14 = (x4 | x13);
+ { uint32_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e171m19/feopp.c b/src/Specific/montgomery32_2e171m19/feopp.c
index 79748f21c..90528287a 100644
--- a/src/Specific/montgomery32_2e171m19/feopp.c
+++ b/src/Specific/montgomery32_2e171m19/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
-{ uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
-{ uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
-{ uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
-{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
-{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
-{ uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
-{ uint32_t x30 = (x29 & 0xffffffed);
-{ uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
-{ uint32_t x34 = (x29 & 0xffffffff);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
-{ uint32_t x38 = (x29 & 0xffffffff);
-{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
-{ uint32_t x42 = (x29 & 0xffffffff);
-{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
-{ uint32_t x46 = (x29 & 0xffffffff);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
-{ uint32_t x50 = (x29 & 0x7ff);
-{ uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
+ { uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
+ { uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
+ { uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
+ { uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
+ { uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
+ { uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+ { uint32_t x30 = (x29 & 0xffffffed);
+ { uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
+ { uint32_t x34 = (x29 & 0xffffffff);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
+ { uint32_t x38 = (x29 & 0xffffffff);
+ { uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
+ { uint32_t x42 = (x29 & 0xffffffff);
+ { uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
+ { uint32_t x46 = (x29 & 0xffffffff);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
+ { uint32_t x50 = (x29 & 0x7ff);
+ { uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e171m19/fesub.c b/src/Specific/montgomery32_2e171m19/fesub.c
index c47be818c..6d10dffd7 100644
--- a/src/Specific/montgomery32_2e171m19/fesub.c
+++ b/src/Specific/montgomery32_2e171m19/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
-{ uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
-{ uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
-{ uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
-{ uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
-{ uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
-{ uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
-{ uint32_t x43 = (x42 & 0xffffffed);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
-{ uint32_t x47 = (x42 & 0xffffffff);
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
-{ uint32_t x51 = (x42 & 0xffffffff);
-{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
-{ uint32_t x55 = (x42 & 0xffffffff);
-{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
-{ uint32_t x59 = (x42 & 0xffffffff);
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
-{ uint32_t x63 = (x42 & 0x7ff);
-{ uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
+ { uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
+ { uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
+ { uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
+ { uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
+ { uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
+ { uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
+ { uint32_t x43 = (x42 & 0xffffffed);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
+ { uint32_t x47 = (x42 & 0xffffffff);
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
+ { uint32_t x51 = (x42 & 0xffffffff);
+ { uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
+ { uint32_t x55 = (x42 & 0xffffffff);
+ { uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
+ { uint32_t x59 = (x42 & 0xffffffff);
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
+ { uint32_t x63 = (x42 & 0x7ff);
+ { uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e174m17/feadd.c b/src/Specific/montgomery32_2e174m17/feadd.c
index 0190a5e63..2c1967002 100644
--- a/src/Specific/montgomery32_2e174m17/feadd.c
+++ b/src/Specific/montgomery32_2e174m17/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
-{ uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
-{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
-{ uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
-{ uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
-{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
-{ uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xffffffef, &x43);
-{ uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
-{ uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
-{ uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
-{ uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
-{ uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x3fff, &x58);
-{ uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
-{ uint32_t x63 = cmovznz(x62, x58, x40);
-{ uint32_t x64 = cmovznz(x62, x55, x37);
-{ uint32_t x65 = cmovznz(x62, x52, x34);
-{ uint32_t x66 = cmovznz(x62, x49, x31);
-{ uint32_t x67 = cmovznz(x62, x46, x28);
-{ uint32_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
+ { uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
+ { uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
+ { uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
+ { uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
+ { uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
+ { uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xffffffef, &x43);
+ { uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
+ { uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
+ { uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
+ { uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
+ { uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x3fff, &x58);
+ { uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
+ { uint32_t x63 = cmovznz(x62, x58, x40);
+ { uint32_t x64 = cmovznz(x62, x55, x37);
+ { uint32_t x65 = cmovznz(x62, x52, x34);
+ { uint32_t x66 = cmovznz(x62, x49, x31);
+ { uint32_t x67 = cmovznz(x62, x46, x28);
+ { uint32_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e174m17/femul.c b/src/Specific/montgomery32_2e174m17/femul.c
index 7b1f772c8..e713bda8f 100644
--- a/src/Specific/montgomery32_2e174m17/femul.c
+++ b/src/Specific/montgomery32_2e174m17/femul.c
@@ -1,272 +1,266 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
-{ uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
-{ uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
-{ uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
-{ uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
-{ uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
-{ uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
-{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
-{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
-{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
-{ uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
-{ uint32_t _; uint32_t x61 = _mulx_u32(x25, 0xf0f0f0f1, &_);
-{ uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xffffffef, &x65);
-{ uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
-{ uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
-{ uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
-{ uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
-{ uint32_t x80; uint32_t x79 = _mulx_u32(x61, 0x3fff, &x80);
-{ uint32_t x82; uint8_t x83 = _addcarryx_u32(0x0, x65, x67, &x82);
-{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
-{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
-{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
-{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x77, x79, &x94);
-{ uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x80, &x97);
-{ uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x25, x64, &_);
-{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x43, x82, &x103);
-{ uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x46, x85, &x106);
-{ uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x49, x88, &x109);
-{ uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x52, x91, &x112);
-{ uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x55, x94, &x115);
-{ uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x58, x97, &x118);
-{ uint32_t x122; uint32_t x121 = _mulx_u32(x7, x15, &x122);
-{ uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125);
-{ uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128);
-{ uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131);
-{ uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134);
-{ uint32_t x137; uint32_t x136 = _mulx_u32(x7, x22, &x137);
-{ uint32_t x139; uint8_t x140 = _addcarryx_u32(0x0, x122, x124, &x139);
-{ uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142);
-{ uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145);
-{ uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148);
-{ uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x134, x136, &x151);
-{ uint32_t x154; uint8_t _ = _addcarryx_u32(0x0, x152, x137, &x154);
-{ uint32_t x157; uint8_t x158 = _addcarryx_u32(0x0, x103, x121, &x157);
-{ uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160);
-{ uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163);
-{ uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166);
-{ uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169);
-{ uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x118, x151, &x172);
-{ uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x119, x154, &x175);
-{ uint32_t _; uint32_t x178 = _mulx_u32(x157, 0xf0f0f0f1, &_);
-{ uint32_t x182; uint32_t x181 = _mulx_u32(x178, 0xffffffef, &x182);
-{ uint32_t x185; uint32_t x184 = _mulx_u32(x178, 0xffffffff, &x185);
-{ uint32_t x188; uint32_t x187 = _mulx_u32(x178, 0xffffffff, &x188);
-{ uint32_t x191; uint32_t x190 = _mulx_u32(x178, 0xffffffff, &x191);
-{ uint32_t x194; uint32_t x193 = _mulx_u32(x178, 0xffffffff, &x194);
-{ uint32_t x197; uint32_t x196 = _mulx_u32(x178, 0x3fff, &x197);
-{ uint32_t x199; uint8_t x200 = _addcarryx_u32(0x0, x182, x184, &x199);
-{ uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202);
-{ uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205);
-{ uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x191, x193, &x208);
-{ uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x194, x196, &x211);
-{ uint32_t x214; uint8_t _ = _addcarryx_u32(0x0, x212, x197, &x214);
-{ uint32_t _; uint8_t x218 = _addcarryx_u32(0x0, x157, x181, &_);
-{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x160, x199, &x220);
-{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x163, x202, &x223);
-{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x166, x205, &x226);
-{ uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x169, x208, &x229);
-{ uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x172, x211, &x232);
-{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x175, x214, &x235);
-{ uint8_t x237 = (x236 + x176);
-{ uint32_t x240; uint32_t x239 = _mulx_u32(x9, x15, &x240);
-{ uint32_t x243; uint32_t x242 = _mulx_u32(x9, x17, &x243);
-{ uint32_t x246; uint32_t x245 = _mulx_u32(x9, x19, &x246);
-{ uint32_t x249; uint32_t x248 = _mulx_u32(x9, x21, &x249);
-{ uint32_t x252; uint32_t x251 = _mulx_u32(x9, x23, &x252);
-{ uint32_t x255; uint32_t x254 = _mulx_u32(x9, x22, &x255);
-{ uint32_t x257; uint8_t x258 = _addcarryx_u32(0x0, x240, x242, &x257);
-{ uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260);
-{ uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263);
-{ uint32_t x266; uint8_t x267 = _addcarryx_u32(x264, x249, x251, &x266);
-{ uint32_t x269; uint8_t x270 = _addcarryx_u32(x267, x252, x254, &x269);
-{ uint32_t x272; uint8_t _ = _addcarryx_u32(0x0, x270, x255, &x272);
-{ uint32_t x275; uint8_t x276 = _addcarryx_u32(0x0, x220, x239, &x275);
-{ uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278);
-{ uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281);
-{ uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284);
-{ uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x232, x266, &x287);
-{ uint32_t x290; uint8_t x291 = _addcarryx_u32(x288, x235, x269, &x290);
-{ uint32_t x293; uint8_t x294 = _addcarryx_u32(x291, x237, x272, &x293);
-{ uint32_t _; uint32_t x296 = _mulx_u32(x275, 0xf0f0f0f1, &_);
-{ uint32_t x300; uint32_t x299 = _mulx_u32(x296, 0xffffffef, &x300);
-{ uint32_t x303; uint32_t x302 = _mulx_u32(x296, 0xffffffff, &x303);
-{ uint32_t x306; uint32_t x305 = _mulx_u32(x296, 0xffffffff, &x306);
-{ uint32_t x309; uint32_t x308 = _mulx_u32(x296, 0xffffffff, &x309);
-{ uint32_t x312; uint32_t x311 = _mulx_u32(x296, 0xffffffff, &x312);
-{ uint32_t x315; uint32_t x314 = _mulx_u32(x296, 0x3fff, &x315);
-{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x300, x302, &x317);
-{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320);
-{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x306, x308, &x323);
-{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x309, x311, &x326);
-{ uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x312, x314, &x329);
-{ uint32_t x332; uint8_t _ = _addcarryx_u32(0x0, x330, x315, &x332);
-{ uint32_t _; uint8_t x336 = _addcarryx_u32(0x0, x275, x299, &_);
-{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x278, x317, &x338);
-{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x281, x320, &x341);
-{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x284, x323, &x344);
-{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x287, x326, &x347);
-{ uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x290, x329, &x350);
-{ uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x293, x332, &x353);
-{ uint8_t x355 = (x354 + x294);
-{ uint32_t x358; uint32_t x357 = _mulx_u32(x11, x15, &x358);
-{ uint32_t x361; uint32_t x360 = _mulx_u32(x11, x17, &x361);
-{ uint32_t x364; uint32_t x363 = _mulx_u32(x11, x19, &x364);
-{ uint32_t x367; uint32_t x366 = _mulx_u32(x11, x21, &x367);
-{ uint32_t x370; uint32_t x369 = _mulx_u32(x11, x23, &x370);
-{ uint32_t x373; uint32_t x372 = _mulx_u32(x11, x22, &x373);
-{ uint32_t x375; uint8_t x376 = _addcarryx_u32(0x0, x358, x360, &x375);
-{ uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
-{ uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x364, x366, &x381);
-{ uint32_t x384; uint8_t x385 = _addcarryx_u32(x382, x367, x369, &x384);
-{ uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x370, x372, &x387);
-{ uint32_t x390; uint8_t _ = _addcarryx_u32(0x0, x388, x373, &x390);
-{ uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x338, x357, &x393);
-{ uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
-{ uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
-{ uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x347, x381, &x402);
-{ uint32_t x405; uint8_t x406 = _addcarryx_u32(x403, x350, x384, &x405);
-{ uint32_t x408; uint8_t x409 = _addcarryx_u32(x406, x353, x387, &x408);
-{ uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x355, x390, &x411);
-{ uint32_t _; uint32_t x414 = _mulx_u32(x393, 0xf0f0f0f1, &_);
-{ uint32_t x418; uint32_t x417 = _mulx_u32(x414, 0xffffffef, &x418);
-{ uint32_t x421; uint32_t x420 = _mulx_u32(x414, 0xffffffff, &x421);
-{ uint32_t x424; uint32_t x423 = _mulx_u32(x414, 0xffffffff, &x424);
-{ uint32_t x427; uint32_t x426 = _mulx_u32(x414, 0xffffffff, &x427);
-{ uint32_t x430; uint32_t x429 = _mulx_u32(x414, 0xffffffff, &x430);
-{ uint32_t x433; uint32_t x432 = _mulx_u32(x414, 0x3fff, &x433);
-{ uint32_t x435; uint8_t x436 = _addcarryx_u32(0x0, x418, x420, &x435);
-{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x421, x423, &x438);
-{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x424, x426, &x441);
-{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x427, x429, &x444);
-{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x430, x432, &x447);
-{ uint32_t x450; uint8_t _ = _addcarryx_u32(0x0, x448, x433, &x450);
-{ uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x393, x417, &_);
-{ uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x396, x435, &x456);
-{ uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x399, x438, &x459);
-{ uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x402, x441, &x462);
-{ uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x405, x444, &x465);
-{ uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x408, x447, &x468);
-{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x411, x450, &x471);
-{ uint8_t x473 = (x472 + x412);
-{ uint32_t x476; uint32_t x475 = _mulx_u32(x13, x15, &x476);
-{ uint32_t x479; uint32_t x478 = _mulx_u32(x13, x17, &x479);
-{ uint32_t x482; uint32_t x481 = _mulx_u32(x13, x19, &x482);
-{ uint32_t x485; uint32_t x484 = _mulx_u32(x13, x21, &x485);
-{ uint32_t x488; uint32_t x487 = _mulx_u32(x13, x23, &x488);
-{ uint32_t x491; uint32_t x490 = _mulx_u32(x13, x22, &x491);
-{ uint32_t x493; uint8_t x494 = _addcarryx_u32(0x0, x476, x478, &x493);
-{ uint32_t x496; uint8_t x497 = _addcarryx_u32(x494, x479, x481, &x496);
-{ uint32_t x499; uint8_t x500 = _addcarryx_u32(x497, x482, x484, &x499);
-{ uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x485, x487, &x502);
-{ uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x488, x490, &x505);
-{ uint32_t x508; uint8_t _ = _addcarryx_u32(0x0, x506, x491, &x508);
-{ uint32_t x511; uint8_t x512 = _addcarryx_u32(0x0, x456, x475, &x511);
-{ uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514);
-{ uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x462, x496, &x517);
-{ uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x465, x499, &x520);
-{ uint32_t x523; uint8_t x524 = _addcarryx_u32(x521, x468, x502, &x523);
-{ uint32_t x526; uint8_t x527 = _addcarryx_u32(x524, x471, x505, &x526);
-{ uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x473, x508, &x529);
-{ uint32_t _; uint32_t x532 = _mulx_u32(x511, 0xf0f0f0f1, &_);
-{ uint32_t x536; uint32_t x535 = _mulx_u32(x532, 0xffffffef, &x536);
-{ uint32_t x539; uint32_t x538 = _mulx_u32(x532, 0xffffffff, &x539);
-{ uint32_t x542; uint32_t x541 = _mulx_u32(x532, 0xffffffff, &x542);
-{ uint32_t x545; uint32_t x544 = _mulx_u32(x532, 0xffffffff, &x545);
-{ uint32_t x548; uint32_t x547 = _mulx_u32(x532, 0xffffffff, &x548);
-{ uint32_t x551; uint32_t x550 = _mulx_u32(x532, 0x3fff, &x551);
-{ uint32_t x553; uint8_t x554 = _addcarryx_u32(0x0, x536, x538, &x553);
-{ uint32_t x556; uint8_t x557 = _addcarryx_u32(x554, x539, x541, &x556);
-{ uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x542, x544, &x559);
-{ uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x545, x547, &x562);
-{ uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x548, x550, &x565);
-{ uint32_t x568; uint8_t _ = _addcarryx_u32(0x0, x566, x551, &x568);
-{ uint32_t _; uint8_t x572 = _addcarryx_u32(0x0, x511, x535, &_);
-{ uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x514, x553, &x574);
-{ uint32_t x577; uint8_t x578 = _addcarryx_u32(x575, x517, x556, &x577);
-{ uint32_t x580; uint8_t x581 = _addcarryx_u32(x578, x520, x559, &x580);
-{ uint32_t x583; uint8_t x584 = _addcarryx_u32(x581, x523, x562, &x583);
-{ uint32_t x586; uint8_t x587 = _addcarryx_u32(x584, x526, x565, &x586);
-{ uint32_t x589; uint8_t x590 = _addcarryx_u32(x587, x529, x568, &x589);
-{ uint8_t x591 = (x590 + x530);
-{ uint32_t x594; uint32_t x593 = _mulx_u32(x12, x15, &x594);
-{ uint32_t x597; uint32_t x596 = _mulx_u32(x12, x17, &x597);
-{ uint32_t x600; uint32_t x599 = _mulx_u32(x12, x19, &x600);
-{ uint32_t x603; uint32_t x602 = _mulx_u32(x12, x21, &x603);
-{ uint32_t x606; uint32_t x605 = _mulx_u32(x12, x23, &x606);
-{ uint32_t x609; uint32_t x608 = _mulx_u32(x12, x22, &x609);
-{ uint32_t x611; uint8_t x612 = _addcarryx_u32(0x0, x594, x596, &x611);
-{ uint32_t x614; uint8_t x615 = _addcarryx_u32(x612, x597, x599, &x614);
-{ uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x600, x602, &x617);
-{ uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x603, x605, &x620);
-{ uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x606, x608, &x623);
-{ uint32_t x626; uint8_t _ = _addcarryx_u32(0x0, x624, x609, &x626);
-{ uint32_t x629; uint8_t x630 = _addcarryx_u32(0x0, x574, x593, &x629);
-{ uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x577, x611, &x632);
-{ uint32_t x635; uint8_t x636 = _addcarryx_u32(x633, x580, x614, &x635);
-{ uint32_t x638; uint8_t x639 = _addcarryx_u32(x636, x583, x617, &x638);
-{ uint32_t x641; uint8_t x642 = _addcarryx_u32(x639, x586, x620, &x641);
-{ uint32_t x644; uint8_t x645 = _addcarryx_u32(x642, x589, x623, &x644);
-{ uint32_t x647; uint8_t x648 = _addcarryx_u32(x645, x591, x626, &x647);
-{ uint32_t _; uint32_t x650 = _mulx_u32(x629, 0xf0f0f0f1, &_);
-{ uint32_t x654; uint32_t x653 = _mulx_u32(x650, 0xffffffef, &x654);
-{ uint32_t x657; uint32_t x656 = _mulx_u32(x650, 0xffffffff, &x657);
-{ uint32_t x660; uint32_t x659 = _mulx_u32(x650, 0xffffffff, &x660);
-{ uint32_t x663; uint32_t x662 = _mulx_u32(x650, 0xffffffff, &x663);
-{ uint32_t x666; uint32_t x665 = _mulx_u32(x650, 0xffffffff, &x666);
-{ uint32_t x669; uint32_t x668 = _mulx_u32(x650, 0x3fff, &x669);
-{ uint32_t x671; uint8_t x672 = _addcarryx_u32(0x0, x654, x656, &x671);
-{ uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x657, x659, &x674);
-{ uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x660, x662, &x677);
-{ uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x663, x665, &x680);
-{ uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x666, x668, &x683);
-{ uint32_t x686; uint8_t _ = _addcarryx_u32(0x0, x684, x669, &x686);
-{ uint32_t _; uint8_t x690 = _addcarryx_u32(0x0, x629, x653, &_);
-{ uint32_t x692; uint8_t x693 = _addcarryx_u32(x690, x632, x671, &x692);
-{ uint32_t x695; uint8_t x696 = _addcarryx_u32(x693, x635, x674, &x695);
-{ uint32_t x698; uint8_t x699 = _addcarryx_u32(x696, x638, x677, &x698);
-{ uint32_t x701; uint8_t x702 = _addcarryx_u32(x699, x641, x680, &x701);
-{ uint32_t x704; uint8_t x705 = _addcarryx_u32(x702, x644, x683, &x704);
-{ uint32_t x707; uint8_t x708 = _addcarryx_u32(x705, x647, x686, &x707);
-{ uint8_t x709 = (x708 + x648);
-{ uint32_t x711; uint8_t x712 = _subborrow_u32(0x0, x692, 0xffffffef, &x711);
-{ uint32_t x714; uint8_t x715 = _subborrow_u32(x712, x695, 0xffffffff, &x714);
-{ uint32_t x717; uint8_t x718 = _subborrow_u32(x715, x698, 0xffffffff, &x717);
-{ uint32_t x720; uint8_t x721 = _subborrow_u32(x718, x701, 0xffffffff, &x720);
-{ uint32_t x723; uint8_t x724 = _subborrow_u32(x721, x704, 0xffffffff, &x723);
-{ uint32_t x726; uint8_t x727 = _subborrow_u32(x724, x707, 0x3fff, &x726);
-{ uint32_t _; uint8_t x730 = _subborrow_u32(x727, x709, 0x0, &_);
-{ uint32_t x731 = cmovznz(x730, x726, x707);
-{ uint32_t x732 = cmovznz(x730, x723, x704);
-{ uint32_t x733 = cmovznz(x730, x720, x701);
-{ uint32_t x734 = cmovznz(x730, x717, x698);
-{ uint32_t x735 = cmovznz(x730, x714, x695);
-{ uint32_t x736 = cmovznz(x730, x711, x692);
-out[0] = x731;
-out[1] = x732;
-out[2] = x733;
-out[3] = x734;
-out[4] = x735;
-out[5] = x736;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
+ { uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
+ { uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
+ { uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
+ { uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
+ { uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
+ { uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
+ { uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
+ { uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
+ { uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
+ { uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
+ { uint32_t _; uint32_t x61 = _mulx_u32(x25, 0xf0f0f0f1, &_);
+ { uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xffffffef, &x65);
+ { uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
+ { uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
+ { uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
+ { uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
+ { uint32_t x80; uint32_t x79 = _mulx_u32(x61, 0x3fff, &x80);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(0x0, x65, x67, &x82);
+ { uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
+ { uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
+ { uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
+ { uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x77, x79, &x94);
+ { uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x80, &x97);
+ { uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x25, x64, &_);
+ { uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x43, x82, &x103);
+ { uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x46, x85, &x106);
+ { uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x49, x88, &x109);
+ { uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x52, x91, &x112);
+ { uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x55, x94, &x115);
+ { uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x58, x97, &x118);
+ { uint32_t x122; uint32_t x121 = _mulx_u32(x7, x15, &x122);
+ { uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125);
+ { uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128);
+ { uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131);
+ { uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134);
+ { uint32_t x137; uint32_t x136 = _mulx_u32(x7, x22, &x137);
+ { uint32_t x139; uint8_t x140 = _addcarryx_u32(0x0, x122, x124, &x139);
+ { uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142);
+ { uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145);
+ { uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148);
+ { uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x134, x136, &x151);
+ { uint32_t x154; uint8_t _ = _addcarryx_u32(0x0, x152, x137, &x154);
+ { uint32_t x157; uint8_t x158 = _addcarryx_u32(0x0, x103, x121, &x157);
+ { uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160);
+ { uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163);
+ { uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166);
+ { uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169);
+ { uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x118, x151, &x172);
+ { uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x119, x154, &x175);
+ { uint32_t _; uint32_t x178 = _mulx_u32(x157, 0xf0f0f0f1, &_);
+ { uint32_t x182; uint32_t x181 = _mulx_u32(x178, 0xffffffef, &x182);
+ { uint32_t x185; uint32_t x184 = _mulx_u32(x178, 0xffffffff, &x185);
+ { uint32_t x188; uint32_t x187 = _mulx_u32(x178, 0xffffffff, &x188);
+ { uint32_t x191; uint32_t x190 = _mulx_u32(x178, 0xffffffff, &x191);
+ { uint32_t x194; uint32_t x193 = _mulx_u32(x178, 0xffffffff, &x194);
+ { uint32_t x197; uint32_t x196 = _mulx_u32(x178, 0x3fff, &x197);
+ { uint32_t x199; uint8_t x200 = _addcarryx_u32(0x0, x182, x184, &x199);
+ { uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202);
+ { uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205);
+ { uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x191, x193, &x208);
+ { uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x194, x196, &x211);
+ { uint32_t x214; uint8_t _ = _addcarryx_u32(0x0, x212, x197, &x214);
+ { uint32_t _; uint8_t x218 = _addcarryx_u32(0x0, x157, x181, &_);
+ { uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x160, x199, &x220);
+ { uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x163, x202, &x223);
+ { uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x166, x205, &x226);
+ { uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x169, x208, &x229);
+ { uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x172, x211, &x232);
+ { uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x175, x214, &x235);
+ { uint8_t x237 = (x236 + x176);
+ { uint32_t x240; uint32_t x239 = _mulx_u32(x9, x15, &x240);
+ { uint32_t x243; uint32_t x242 = _mulx_u32(x9, x17, &x243);
+ { uint32_t x246; uint32_t x245 = _mulx_u32(x9, x19, &x246);
+ { uint32_t x249; uint32_t x248 = _mulx_u32(x9, x21, &x249);
+ { uint32_t x252; uint32_t x251 = _mulx_u32(x9, x23, &x252);
+ { uint32_t x255; uint32_t x254 = _mulx_u32(x9, x22, &x255);
+ { uint32_t x257; uint8_t x258 = _addcarryx_u32(0x0, x240, x242, &x257);
+ { uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260);
+ { uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263);
+ { uint32_t x266; uint8_t x267 = _addcarryx_u32(x264, x249, x251, &x266);
+ { uint32_t x269; uint8_t x270 = _addcarryx_u32(x267, x252, x254, &x269);
+ { uint32_t x272; uint8_t _ = _addcarryx_u32(0x0, x270, x255, &x272);
+ { uint32_t x275; uint8_t x276 = _addcarryx_u32(0x0, x220, x239, &x275);
+ { uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278);
+ { uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281);
+ { uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284);
+ { uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x232, x266, &x287);
+ { uint32_t x290; uint8_t x291 = _addcarryx_u32(x288, x235, x269, &x290);
+ { uint32_t x293; uint8_t x294 = _addcarryx_u32(x291, x237, x272, &x293);
+ { uint32_t _; uint32_t x296 = _mulx_u32(x275, 0xf0f0f0f1, &_);
+ { uint32_t x300; uint32_t x299 = _mulx_u32(x296, 0xffffffef, &x300);
+ { uint32_t x303; uint32_t x302 = _mulx_u32(x296, 0xffffffff, &x303);
+ { uint32_t x306; uint32_t x305 = _mulx_u32(x296, 0xffffffff, &x306);
+ { uint32_t x309; uint32_t x308 = _mulx_u32(x296, 0xffffffff, &x309);
+ { uint32_t x312; uint32_t x311 = _mulx_u32(x296, 0xffffffff, &x312);
+ { uint32_t x315; uint32_t x314 = _mulx_u32(x296, 0x3fff, &x315);
+ { uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x300, x302, &x317);
+ { uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320);
+ { uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x306, x308, &x323);
+ { uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x309, x311, &x326);
+ { uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x312, x314, &x329);
+ { uint32_t x332; uint8_t _ = _addcarryx_u32(0x0, x330, x315, &x332);
+ { uint32_t _; uint8_t x336 = _addcarryx_u32(0x0, x275, x299, &_);
+ { uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x278, x317, &x338);
+ { uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x281, x320, &x341);
+ { uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x284, x323, &x344);
+ { uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x287, x326, &x347);
+ { uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x290, x329, &x350);
+ { uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x293, x332, &x353);
+ { uint8_t x355 = (x354 + x294);
+ { uint32_t x358; uint32_t x357 = _mulx_u32(x11, x15, &x358);
+ { uint32_t x361; uint32_t x360 = _mulx_u32(x11, x17, &x361);
+ { uint32_t x364; uint32_t x363 = _mulx_u32(x11, x19, &x364);
+ { uint32_t x367; uint32_t x366 = _mulx_u32(x11, x21, &x367);
+ { uint32_t x370; uint32_t x369 = _mulx_u32(x11, x23, &x370);
+ { uint32_t x373; uint32_t x372 = _mulx_u32(x11, x22, &x373);
+ { uint32_t x375; uint8_t x376 = _addcarryx_u32(0x0, x358, x360, &x375);
+ { uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
+ { uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x364, x366, &x381);
+ { uint32_t x384; uint8_t x385 = _addcarryx_u32(x382, x367, x369, &x384);
+ { uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x370, x372, &x387);
+ { uint32_t x390; uint8_t _ = _addcarryx_u32(0x0, x388, x373, &x390);
+ { uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x338, x357, &x393);
+ { uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
+ { uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
+ { uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x347, x381, &x402);
+ { uint32_t x405; uint8_t x406 = _addcarryx_u32(x403, x350, x384, &x405);
+ { uint32_t x408; uint8_t x409 = _addcarryx_u32(x406, x353, x387, &x408);
+ { uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x355, x390, &x411);
+ { uint32_t _; uint32_t x414 = _mulx_u32(x393, 0xf0f0f0f1, &_);
+ { uint32_t x418; uint32_t x417 = _mulx_u32(x414, 0xffffffef, &x418);
+ { uint32_t x421; uint32_t x420 = _mulx_u32(x414, 0xffffffff, &x421);
+ { uint32_t x424; uint32_t x423 = _mulx_u32(x414, 0xffffffff, &x424);
+ { uint32_t x427; uint32_t x426 = _mulx_u32(x414, 0xffffffff, &x427);
+ { uint32_t x430; uint32_t x429 = _mulx_u32(x414, 0xffffffff, &x430);
+ { uint32_t x433; uint32_t x432 = _mulx_u32(x414, 0x3fff, &x433);
+ { uint32_t x435; uint8_t x436 = _addcarryx_u32(0x0, x418, x420, &x435);
+ { uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x421, x423, &x438);
+ { uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x424, x426, &x441);
+ { uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x427, x429, &x444);
+ { uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x430, x432, &x447);
+ { uint32_t x450; uint8_t _ = _addcarryx_u32(0x0, x448, x433, &x450);
+ { uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x393, x417, &_);
+ { uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x396, x435, &x456);
+ { uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x399, x438, &x459);
+ { uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x402, x441, &x462);
+ { uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x405, x444, &x465);
+ { uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x408, x447, &x468);
+ { uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x411, x450, &x471);
+ { uint8_t x473 = (x472 + x412);
+ { uint32_t x476; uint32_t x475 = _mulx_u32(x13, x15, &x476);
+ { uint32_t x479; uint32_t x478 = _mulx_u32(x13, x17, &x479);
+ { uint32_t x482; uint32_t x481 = _mulx_u32(x13, x19, &x482);
+ { uint32_t x485; uint32_t x484 = _mulx_u32(x13, x21, &x485);
+ { uint32_t x488; uint32_t x487 = _mulx_u32(x13, x23, &x488);
+ { uint32_t x491; uint32_t x490 = _mulx_u32(x13, x22, &x491);
+ { uint32_t x493; uint8_t x494 = _addcarryx_u32(0x0, x476, x478, &x493);
+ { uint32_t x496; uint8_t x497 = _addcarryx_u32(x494, x479, x481, &x496);
+ { uint32_t x499; uint8_t x500 = _addcarryx_u32(x497, x482, x484, &x499);
+ { uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x485, x487, &x502);
+ { uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x488, x490, &x505);
+ { uint32_t x508; uint8_t _ = _addcarryx_u32(0x0, x506, x491, &x508);
+ { uint32_t x511; uint8_t x512 = _addcarryx_u32(0x0, x456, x475, &x511);
+ { uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514);
+ { uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x462, x496, &x517);
+ { uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x465, x499, &x520);
+ { uint32_t x523; uint8_t x524 = _addcarryx_u32(x521, x468, x502, &x523);
+ { uint32_t x526; uint8_t x527 = _addcarryx_u32(x524, x471, x505, &x526);
+ { uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x473, x508, &x529);
+ { uint32_t _; uint32_t x532 = _mulx_u32(x511, 0xf0f0f0f1, &_);
+ { uint32_t x536; uint32_t x535 = _mulx_u32(x532, 0xffffffef, &x536);
+ { uint32_t x539; uint32_t x538 = _mulx_u32(x532, 0xffffffff, &x539);
+ { uint32_t x542; uint32_t x541 = _mulx_u32(x532, 0xffffffff, &x542);
+ { uint32_t x545; uint32_t x544 = _mulx_u32(x532, 0xffffffff, &x545);
+ { uint32_t x548; uint32_t x547 = _mulx_u32(x532, 0xffffffff, &x548);
+ { uint32_t x551; uint32_t x550 = _mulx_u32(x532, 0x3fff, &x551);
+ { uint32_t x553; uint8_t x554 = _addcarryx_u32(0x0, x536, x538, &x553);
+ { uint32_t x556; uint8_t x557 = _addcarryx_u32(x554, x539, x541, &x556);
+ { uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x542, x544, &x559);
+ { uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x545, x547, &x562);
+ { uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x548, x550, &x565);
+ { uint32_t x568; uint8_t _ = _addcarryx_u32(0x0, x566, x551, &x568);
+ { uint32_t _; uint8_t x572 = _addcarryx_u32(0x0, x511, x535, &_);
+ { uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x514, x553, &x574);
+ { uint32_t x577; uint8_t x578 = _addcarryx_u32(x575, x517, x556, &x577);
+ { uint32_t x580; uint8_t x581 = _addcarryx_u32(x578, x520, x559, &x580);
+ { uint32_t x583; uint8_t x584 = _addcarryx_u32(x581, x523, x562, &x583);
+ { uint32_t x586; uint8_t x587 = _addcarryx_u32(x584, x526, x565, &x586);
+ { uint32_t x589; uint8_t x590 = _addcarryx_u32(x587, x529, x568, &x589);
+ { uint8_t x591 = (x590 + x530);
+ { uint32_t x594; uint32_t x593 = _mulx_u32(x12, x15, &x594);
+ { uint32_t x597; uint32_t x596 = _mulx_u32(x12, x17, &x597);
+ { uint32_t x600; uint32_t x599 = _mulx_u32(x12, x19, &x600);
+ { uint32_t x603; uint32_t x602 = _mulx_u32(x12, x21, &x603);
+ { uint32_t x606; uint32_t x605 = _mulx_u32(x12, x23, &x606);
+ { uint32_t x609; uint32_t x608 = _mulx_u32(x12, x22, &x609);
+ { uint32_t x611; uint8_t x612 = _addcarryx_u32(0x0, x594, x596, &x611);
+ { uint32_t x614; uint8_t x615 = _addcarryx_u32(x612, x597, x599, &x614);
+ { uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x600, x602, &x617);
+ { uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x603, x605, &x620);
+ { uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x606, x608, &x623);
+ { uint32_t x626; uint8_t _ = _addcarryx_u32(0x0, x624, x609, &x626);
+ { uint32_t x629; uint8_t x630 = _addcarryx_u32(0x0, x574, x593, &x629);
+ { uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x577, x611, &x632);
+ { uint32_t x635; uint8_t x636 = _addcarryx_u32(x633, x580, x614, &x635);
+ { uint32_t x638; uint8_t x639 = _addcarryx_u32(x636, x583, x617, &x638);
+ { uint32_t x641; uint8_t x642 = _addcarryx_u32(x639, x586, x620, &x641);
+ { uint32_t x644; uint8_t x645 = _addcarryx_u32(x642, x589, x623, &x644);
+ { uint32_t x647; uint8_t x648 = _addcarryx_u32(x645, x591, x626, &x647);
+ { uint32_t _; uint32_t x650 = _mulx_u32(x629, 0xf0f0f0f1, &_);
+ { uint32_t x654; uint32_t x653 = _mulx_u32(x650, 0xffffffef, &x654);
+ { uint32_t x657; uint32_t x656 = _mulx_u32(x650, 0xffffffff, &x657);
+ { uint32_t x660; uint32_t x659 = _mulx_u32(x650, 0xffffffff, &x660);
+ { uint32_t x663; uint32_t x662 = _mulx_u32(x650, 0xffffffff, &x663);
+ { uint32_t x666; uint32_t x665 = _mulx_u32(x650, 0xffffffff, &x666);
+ { uint32_t x669; uint32_t x668 = _mulx_u32(x650, 0x3fff, &x669);
+ { uint32_t x671; uint8_t x672 = _addcarryx_u32(0x0, x654, x656, &x671);
+ { uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x657, x659, &x674);
+ { uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x660, x662, &x677);
+ { uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x663, x665, &x680);
+ { uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x666, x668, &x683);
+ { uint32_t x686; uint8_t _ = _addcarryx_u32(0x0, x684, x669, &x686);
+ { uint32_t _; uint8_t x690 = _addcarryx_u32(0x0, x629, x653, &_);
+ { uint32_t x692; uint8_t x693 = _addcarryx_u32(x690, x632, x671, &x692);
+ { uint32_t x695; uint8_t x696 = _addcarryx_u32(x693, x635, x674, &x695);
+ { uint32_t x698; uint8_t x699 = _addcarryx_u32(x696, x638, x677, &x698);
+ { uint32_t x701; uint8_t x702 = _addcarryx_u32(x699, x641, x680, &x701);
+ { uint32_t x704; uint8_t x705 = _addcarryx_u32(x702, x644, x683, &x704);
+ { uint32_t x707; uint8_t x708 = _addcarryx_u32(x705, x647, x686, &x707);
+ { uint8_t x709 = (x708 + x648);
+ { uint32_t x711; uint8_t x712 = _subborrow_u32(0x0, x692, 0xffffffef, &x711);
+ { uint32_t x714; uint8_t x715 = _subborrow_u32(x712, x695, 0xffffffff, &x714);
+ { uint32_t x717; uint8_t x718 = _subborrow_u32(x715, x698, 0xffffffff, &x717);
+ { uint32_t x720; uint8_t x721 = _subborrow_u32(x718, x701, 0xffffffff, &x720);
+ { uint32_t x723; uint8_t x724 = _subborrow_u32(x721, x704, 0xffffffff, &x723);
+ { uint32_t x726; uint8_t x727 = _subborrow_u32(x724, x707, 0x3fff, &x726);
+ { uint32_t _; uint8_t x730 = _subborrow_u32(x727, x709, 0x0, &_);
+ { uint32_t x731 = cmovznz(x730, x726, x707);
+ { uint32_t x732 = cmovznz(x730, x723, x704);
+ { uint32_t x733 = cmovznz(x730, x720, x701);
+ { uint32_t x734 = cmovznz(x730, x717, x698);
+ { uint32_t x735 = cmovznz(x730, x714, x695);
+ { uint32_t x736 = cmovznz(x730, x711, x692);
+ out[0] = x736;
+ out[1] = x735;
+ out[2] = x734;
+ out[3] = x733;
+ out[4] = x732;
+ out[5] = x731;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e174m17/fenz.c b/src/Specific/montgomery32_2e174m17/fenz.c
index 6d8132b20..2e0454af1 100644
--- a/src/Specific/montgomery32_2e174m17/fenz.c
+++ b/src/Specific/montgomery32_2e174m17/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x11 = (x10 | x9);
-{ uint32_t x12 = (x8 | x11);
-{ uint32_t x13 = (x6 | x12);
-{ uint32_t x14 = (x4 | x13);
-{ uint32_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x11 = (x10 | x9);
+ { uint32_t x12 = (x8 | x11);
+ { uint32_t x13 = (x6 | x12);
+ { uint32_t x14 = (x4 | x13);
+ { uint32_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e174m17/feopp.c b/src/Specific/montgomery32_2e174m17/feopp.c
index a5c384418..c4e407fe1 100644
--- a/src/Specific/montgomery32_2e174m17/feopp.c
+++ b/src/Specific/montgomery32_2e174m17/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
-{ uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
-{ uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
-{ uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
-{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
-{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
-{ uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
-{ uint32_t x30 = (x29 & 0xffffffef);
-{ uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
-{ uint32_t x34 = (x29 & 0xffffffff);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
-{ uint32_t x38 = (x29 & 0xffffffff);
-{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
-{ uint32_t x42 = (x29 & 0xffffffff);
-{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
-{ uint32_t x46 = (x29 & 0xffffffff);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
-{ uint32_t x50 = (x29 & 0x3fff);
-{ uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
+ { uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
+ { uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
+ { uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
+ { uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
+ { uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
+ { uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+ { uint32_t x30 = (x29 & 0xffffffef);
+ { uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
+ { uint32_t x34 = (x29 & 0xffffffff);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
+ { uint32_t x38 = (x29 & 0xffffffff);
+ { uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
+ { uint32_t x42 = (x29 & 0xffffffff);
+ { uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
+ { uint32_t x46 = (x29 & 0xffffffff);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
+ { uint32_t x50 = (x29 & 0x3fff);
+ { uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e174m17/fesub.c b/src/Specific/montgomery32_2e174m17/fesub.c
index 1b2b1408e..0e395b59f 100644
--- a/src/Specific/montgomery32_2e174m17/fesub.c
+++ b/src/Specific/montgomery32_2e174m17/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
-{ uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
-{ uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
-{ uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
-{ uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
-{ uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
-{ uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
-{ uint32_t x43 = (x42 & 0xffffffef);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
-{ uint32_t x47 = (x42 & 0xffffffff);
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
-{ uint32_t x51 = (x42 & 0xffffffff);
-{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
-{ uint32_t x55 = (x42 & 0xffffffff);
-{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
-{ uint32_t x59 = (x42 & 0xffffffff);
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
-{ uint32_t x63 = (x42 & 0x3fff);
-{ uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
+ { uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
+ { uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
+ { uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
+ { uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
+ { uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
+ { uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
+ { uint32_t x43 = (x42 & 0xffffffef);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
+ { uint32_t x47 = (x42 & 0xffffffff);
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
+ { uint32_t x51 = (x42 & 0xffffffff);
+ { uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
+ { uint32_t x55 = (x42 & 0xffffffff);
+ { uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
+ { uint32_t x59 = (x42 & 0xffffffff);
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
+ { uint32_t x63 = (x42 & 0x3fff);
+ { uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e174m3/feadd.c b/src/Specific/montgomery32_2e174m3/feadd.c
index ba1890ee6..d0c417ed7 100644
--- a/src/Specific/montgomery32_2e174m3/feadd.c
+++ b/src/Specific/montgomery32_2e174m3/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
-{ uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
-{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
-{ uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
-{ uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
-{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
-{ uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xfffffffd, &x43);
-{ uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
-{ uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
-{ uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
-{ uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
-{ uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x3fff, &x58);
-{ uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
-{ uint32_t x63 = cmovznz(x62, x58, x40);
-{ uint32_t x64 = cmovznz(x62, x55, x37);
-{ uint32_t x65 = cmovznz(x62, x52, x34);
-{ uint32_t x66 = cmovznz(x62, x49, x31);
-{ uint32_t x67 = cmovznz(x62, x46, x28);
-{ uint32_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
+ { uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
+ { uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
+ { uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
+ { uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
+ { uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
+ { uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xfffffffd, &x43);
+ { uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
+ { uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
+ { uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
+ { uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
+ { uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x3fff, &x58);
+ { uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
+ { uint32_t x63 = cmovznz(x62, x58, x40);
+ { uint32_t x64 = cmovznz(x62, x55, x37);
+ { uint32_t x65 = cmovznz(x62, x52, x34);
+ { uint32_t x66 = cmovznz(x62, x49, x31);
+ { uint32_t x67 = cmovznz(x62, x46, x28);
+ { uint32_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e174m3/femul.c b/src/Specific/montgomery32_2e174m3/femul.c
index ca3165c2d..4f0a64e4e 100644
--- a/src/Specific/montgomery32_2e174m3/femul.c
+++ b/src/Specific/montgomery32_2e174m3/femul.c
@@ -1,272 +1,266 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
-{ uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
-{ uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
-{ uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
-{ uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
-{ uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
-{ uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
-{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
-{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
-{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
-{ uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
-{ uint32_t _; uint32_t x61 = _mulx_u32(x25, 0xaaaaaaab, &_);
-{ uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xfffffffd, &x65);
-{ uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
-{ uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
-{ uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
-{ uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
-{ uint32_t x80; uint32_t x79 = _mulx_u32(x61, 0x3fff, &x80);
-{ uint32_t x82; uint8_t x83 = _addcarryx_u32(0x0, x65, x67, &x82);
-{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
-{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
-{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
-{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x77, x79, &x94);
-{ uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x80, &x97);
-{ uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x25, x64, &_);
-{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x43, x82, &x103);
-{ uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x46, x85, &x106);
-{ uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x49, x88, &x109);
-{ uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x52, x91, &x112);
-{ uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x55, x94, &x115);
-{ uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x58, x97, &x118);
-{ uint32_t x122; uint32_t x121 = _mulx_u32(x7, x15, &x122);
-{ uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125);
-{ uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128);
-{ uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131);
-{ uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134);
-{ uint32_t x137; uint32_t x136 = _mulx_u32(x7, x22, &x137);
-{ uint32_t x139; uint8_t x140 = _addcarryx_u32(0x0, x122, x124, &x139);
-{ uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142);
-{ uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145);
-{ uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148);
-{ uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x134, x136, &x151);
-{ uint32_t x154; uint8_t _ = _addcarryx_u32(0x0, x152, x137, &x154);
-{ uint32_t x157; uint8_t x158 = _addcarryx_u32(0x0, x103, x121, &x157);
-{ uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160);
-{ uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163);
-{ uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166);
-{ uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169);
-{ uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x118, x151, &x172);
-{ uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x119, x154, &x175);
-{ uint32_t _; uint32_t x178 = _mulx_u32(x157, 0xaaaaaaab, &_);
-{ uint32_t x182; uint32_t x181 = _mulx_u32(x178, 0xfffffffd, &x182);
-{ uint32_t x185; uint32_t x184 = _mulx_u32(x178, 0xffffffff, &x185);
-{ uint32_t x188; uint32_t x187 = _mulx_u32(x178, 0xffffffff, &x188);
-{ uint32_t x191; uint32_t x190 = _mulx_u32(x178, 0xffffffff, &x191);
-{ uint32_t x194; uint32_t x193 = _mulx_u32(x178, 0xffffffff, &x194);
-{ uint32_t x197; uint32_t x196 = _mulx_u32(x178, 0x3fff, &x197);
-{ uint32_t x199; uint8_t x200 = _addcarryx_u32(0x0, x182, x184, &x199);
-{ uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202);
-{ uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205);
-{ uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x191, x193, &x208);
-{ uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x194, x196, &x211);
-{ uint32_t x214; uint8_t _ = _addcarryx_u32(0x0, x212, x197, &x214);
-{ uint32_t _; uint8_t x218 = _addcarryx_u32(0x0, x157, x181, &_);
-{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x160, x199, &x220);
-{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x163, x202, &x223);
-{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x166, x205, &x226);
-{ uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x169, x208, &x229);
-{ uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x172, x211, &x232);
-{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x175, x214, &x235);
-{ uint8_t x237 = (x236 + x176);
-{ uint32_t x240; uint32_t x239 = _mulx_u32(x9, x15, &x240);
-{ uint32_t x243; uint32_t x242 = _mulx_u32(x9, x17, &x243);
-{ uint32_t x246; uint32_t x245 = _mulx_u32(x9, x19, &x246);
-{ uint32_t x249; uint32_t x248 = _mulx_u32(x9, x21, &x249);
-{ uint32_t x252; uint32_t x251 = _mulx_u32(x9, x23, &x252);
-{ uint32_t x255; uint32_t x254 = _mulx_u32(x9, x22, &x255);
-{ uint32_t x257; uint8_t x258 = _addcarryx_u32(0x0, x240, x242, &x257);
-{ uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260);
-{ uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263);
-{ uint32_t x266; uint8_t x267 = _addcarryx_u32(x264, x249, x251, &x266);
-{ uint32_t x269; uint8_t x270 = _addcarryx_u32(x267, x252, x254, &x269);
-{ uint32_t x272; uint8_t _ = _addcarryx_u32(0x0, x270, x255, &x272);
-{ uint32_t x275; uint8_t x276 = _addcarryx_u32(0x0, x220, x239, &x275);
-{ uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278);
-{ uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281);
-{ uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284);
-{ uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x232, x266, &x287);
-{ uint32_t x290; uint8_t x291 = _addcarryx_u32(x288, x235, x269, &x290);
-{ uint32_t x293; uint8_t x294 = _addcarryx_u32(x291, x237, x272, &x293);
-{ uint32_t _; uint32_t x296 = _mulx_u32(x275, 0xaaaaaaab, &_);
-{ uint32_t x300; uint32_t x299 = _mulx_u32(x296, 0xfffffffd, &x300);
-{ uint32_t x303; uint32_t x302 = _mulx_u32(x296, 0xffffffff, &x303);
-{ uint32_t x306; uint32_t x305 = _mulx_u32(x296, 0xffffffff, &x306);
-{ uint32_t x309; uint32_t x308 = _mulx_u32(x296, 0xffffffff, &x309);
-{ uint32_t x312; uint32_t x311 = _mulx_u32(x296, 0xffffffff, &x312);
-{ uint32_t x315; uint32_t x314 = _mulx_u32(x296, 0x3fff, &x315);
-{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x300, x302, &x317);
-{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320);
-{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x306, x308, &x323);
-{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x309, x311, &x326);
-{ uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x312, x314, &x329);
-{ uint32_t x332; uint8_t _ = _addcarryx_u32(0x0, x330, x315, &x332);
-{ uint32_t _; uint8_t x336 = _addcarryx_u32(0x0, x275, x299, &_);
-{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x278, x317, &x338);
-{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x281, x320, &x341);
-{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x284, x323, &x344);
-{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x287, x326, &x347);
-{ uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x290, x329, &x350);
-{ uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x293, x332, &x353);
-{ uint8_t x355 = (x354 + x294);
-{ uint32_t x358; uint32_t x357 = _mulx_u32(x11, x15, &x358);
-{ uint32_t x361; uint32_t x360 = _mulx_u32(x11, x17, &x361);
-{ uint32_t x364; uint32_t x363 = _mulx_u32(x11, x19, &x364);
-{ uint32_t x367; uint32_t x366 = _mulx_u32(x11, x21, &x367);
-{ uint32_t x370; uint32_t x369 = _mulx_u32(x11, x23, &x370);
-{ uint32_t x373; uint32_t x372 = _mulx_u32(x11, x22, &x373);
-{ uint32_t x375; uint8_t x376 = _addcarryx_u32(0x0, x358, x360, &x375);
-{ uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
-{ uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x364, x366, &x381);
-{ uint32_t x384; uint8_t x385 = _addcarryx_u32(x382, x367, x369, &x384);
-{ uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x370, x372, &x387);
-{ uint32_t x390; uint8_t _ = _addcarryx_u32(0x0, x388, x373, &x390);
-{ uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x338, x357, &x393);
-{ uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
-{ uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
-{ uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x347, x381, &x402);
-{ uint32_t x405; uint8_t x406 = _addcarryx_u32(x403, x350, x384, &x405);
-{ uint32_t x408; uint8_t x409 = _addcarryx_u32(x406, x353, x387, &x408);
-{ uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x355, x390, &x411);
-{ uint32_t _; uint32_t x414 = _mulx_u32(x393, 0xaaaaaaab, &_);
-{ uint32_t x418; uint32_t x417 = _mulx_u32(x414, 0xfffffffd, &x418);
-{ uint32_t x421; uint32_t x420 = _mulx_u32(x414, 0xffffffff, &x421);
-{ uint32_t x424; uint32_t x423 = _mulx_u32(x414, 0xffffffff, &x424);
-{ uint32_t x427; uint32_t x426 = _mulx_u32(x414, 0xffffffff, &x427);
-{ uint32_t x430; uint32_t x429 = _mulx_u32(x414, 0xffffffff, &x430);
-{ uint32_t x433; uint32_t x432 = _mulx_u32(x414, 0x3fff, &x433);
-{ uint32_t x435; uint8_t x436 = _addcarryx_u32(0x0, x418, x420, &x435);
-{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x421, x423, &x438);
-{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x424, x426, &x441);
-{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x427, x429, &x444);
-{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x430, x432, &x447);
-{ uint32_t x450; uint8_t _ = _addcarryx_u32(0x0, x448, x433, &x450);
-{ uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x393, x417, &_);
-{ uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x396, x435, &x456);
-{ uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x399, x438, &x459);
-{ uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x402, x441, &x462);
-{ uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x405, x444, &x465);
-{ uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x408, x447, &x468);
-{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x411, x450, &x471);
-{ uint8_t x473 = (x472 + x412);
-{ uint32_t x476; uint32_t x475 = _mulx_u32(x13, x15, &x476);
-{ uint32_t x479; uint32_t x478 = _mulx_u32(x13, x17, &x479);
-{ uint32_t x482; uint32_t x481 = _mulx_u32(x13, x19, &x482);
-{ uint32_t x485; uint32_t x484 = _mulx_u32(x13, x21, &x485);
-{ uint32_t x488; uint32_t x487 = _mulx_u32(x13, x23, &x488);
-{ uint32_t x491; uint32_t x490 = _mulx_u32(x13, x22, &x491);
-{ uint32_t x493; uint8_t x494 = _addcarryx_u32(0x0, x476, x478, &x493);
-{ uint32_t x496; uint8_t x497 = _addcarryx_u32(x494, x479, x481, &x496);
-{ uint32_t x499; uint8_t x500 = _addcarryx_u32(x497, x482, x484, &x499);
-{ uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x485, x487, &x502);
-{ uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x488, x490, &x505);
-{ uint32_t x508; uint8_t _ = _addcarryx_u32(0x0, x506, x491, &x508);
-{ uint32_t x511; uint8_t x512 = _addcarryx_u32(0x0, x456, x475, &x511);
-{ uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514);
-{ uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x462, x496, &x517);
-{ uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x465, x499, &x520);
-{ uint32_t x523; uint8_t x524 = _addcarryx_u32(x521, x468, x502, &x523);
-{ uint32_t x526; uint8_t x527 = _addcarryx_u32(x524, x471, x505, &x526);
-{ uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x473, x508, &x529);
-{ uint32_t _; uint32_t x532 = _mulx_u32(x511, 0xaaaaaaab, &_);
-{ uint32_t x536; uint32_t x535 = _mulx_u32(x532, 0xfffffffd, &x536);
-{ uint32_t x539; uint32_t x538 = _mulx_u32(x532, 0xffffffff, &x539);
-{ uint32_t x542; uint32_t x541 = _mulx_u32(x532, 0xffffffff, &x542);
-{ uint32_t x545; uint32_t x544 = _mulx_u32(x532, 0xffffffff, &x545);
-{ uint32_t x548; uint32_t x547 = _mulx_u32(x532, 0xffffffff, &x548);
-{ uint32_t x551; uint32_t x550 = _mulx_u32(x532, 0x3fff, &x551);
-{ uint32_t x553; uint8_t x554 = _addcarryx_u32(0x0, x536, x538, &x553);
-{ uint32_t x556; uint8_t x557 = _addcarryx_u32(x554, x539, x541, &x556);
-{ uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x542, x544, &x559);
-{ uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x545, x547, &x562);
-{ uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x548, x550, &x565);
-{ uint32_t x568; uint8_t _ = _addcarryx_u32(0x0, x566, x551, &x568);
-{ uint32_t _; uint8_t x572 = _addcarryx_u32(0x0, x511, x535, &_);
-{ uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x514, x553, &x574);
-{ uint32_t x577; uint8_t x578 = _addcarryx_u32(x575, x517, x556, &x577);
-{ uint32_t x580; uint8_t x581 = _addcarryx_u32(x578, x520, x559, &x580);
-{ uint32_t x583; uint8_t x584 = _addcarryx_u32(x581, x523, x562, &x583);
-{ uint32_t x586; uint8_t x587 = _addcarryx_u32(x584, x526, x565, &x586);
-{ uint32_t x589; uint8_t x590 = _addcarryx_u32(x587, x529, x568, &x589);
-{ uint8_t x591 = (x590 + x530);
-{ uint32_t x594; uint32_t x593 = _mulx_u32(x12, x15, &x594);
-{ uint32_t x597; uint32_t x596 = _mulx_u32(x12, x17, &x597);
-{ uint32_t x600; uint32_t x599 = _mulx_u32(x12, x19, &x600);
-{ uint32_t x603; uint32_t x602 = _mulx_u32(x12, x21, &x603);
-{ uint32_t x606; uint32_t x605 = _mulx_u32(x12, x23, &x606);
-{ uint32_t x609; uint32_t x608 = _mulx_u32(x12, x22, &x609);
-{ uint32_t x611; uint8_t x612 = _addcarryx_u32(0x0, x594, x596, &x611);
-{ uint32_t x614; uint8_t x615 = _addcarryx_u32(x612, x597, x599, &x614);
-{ uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x600, x602, &x617);
-{ uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x603, x605, &x620);
-{ uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x606, x608, &x623);
-{ uint32_t x626; uint8_t _ = _addcarryx_u32(0x0, x624, x609, &x626);
-{ uint32_t x629; uint8_t x630 = _addcarryx_u32(0x0, x574, x593, &x629);
-{ uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x577, x611, &x632);
-{ uint32_t x635; uint8_t x636 = _addcarryx_u32(x633, x580, x614, &x635);
-{ uint32_t x638; uint8_t x639 = _addcarryx_u32(x636, x583, x617, &x638);
-{ uint32_t x641; uint8_t x642 = _addcarryx_u32(x639, x586, x620, &x641);
-{ uint32_t x644; uint8_t x645 = _addcarryx_u32(x642, x589, x623, &x644);
-{ uint32_t x647; uint8_t x648 = _addcarryx_u32(x645, x591, x626, &x647);
-{ uint32_t _; uint32_t x650 = _mulx_u32(x629, 0xaaaaaaab, &_);
-{ uint32_t x654; uint32_t x653 = _mulx_u32(x650, 0xfffffffd, &x654);
-{ uint32_t x657; uint32_t x656 = _mulx_u32(x650, 0xffffffff, &x657);
-{ uint32_t x660; uint32_t x659 = _mulx_u32(x650, 0xffffffff, &x660);
-{ uint32_t x663; uint32_t x662 = _mulx_u32(x650, 0xffffffff, &x663);
-{ uint32_t x666; uint32_t x665 = _mulx_u32(x650, 0xffffffff, &x666);
-{ uint32_t x669; uint32_t x668 = _mulx_u32(x650, 0x3fff, &x669);
-{ uint32_t x671; uint8_t x672 = _addcarryx_u32(0x0, x654, x656, &x671);
-{ uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x657, x659, &x674);
-{ uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x660, x662, &x677);
-{ uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x663, x665, &x680);
-{ uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x666, x668, &x683);
-{ uint32_t x686; uint8_t _ = _addcarryx_u32(0x0, x684, x669, &x686);
-{ uint32_t _; uint8_t x690 = _addcarryx_u32(0x0, x629, x653, &_);
-{ uint32_t x692; uint8_t x693 = _addcarryx_u32(x690, x632, x671, &x692);
-{ uint32_t x695; uint8_t x696 = _addcarryx_u32(x693, x635, x674, &x695);
-{ uint32_t x698; uint8_t x699 = _addcarryx_u32(x696, x638, x677, &x698);
-{ uint32_t x701; uint8_t x702 = _addcarryx_u32(x699, x641, x680, &x701);
-{ uint32_t x704; uint8_t x705 = _addcarryx_u32(x702, x644, x683, &x704);
-{ uint32_t x707; uint8_t x708 = _addcarryx_u32(x705, x647, x686, &x707);
-{ uint8_t x709 = (x708 + x648);
-{ uint32_t x711; uint8_t x712 = _subborrow_u32(0x0, x692, 0xfffffffd, &x711);
-{ uint32_t x714; uint8_t x715 = _subborrow_u32(x712, x695, 0xffffffff, &x714);
-{ uint32_t x717; uint8_t x718 = _subborrow_u32(x715, x698, 0xffffffff, &x717);
-{ uint32_t x720; uint8_t x721 = _subborrow_u32(x718, x701, 0xffffffff, &x720);
-{ uint32_t x723; uint8_t x724 = _subborrow_u32(x721, x704, 0xffffffff, &x723);
-{ uint32_t x726; uint8_t x727 = _subborrow_u32(x724, x707, 0x3fff, &x726);
-{ uint32_t _; uint8_t x730 = _subborrow_u32(x727, x709, 0x0, &_);
-{ uint32_t x731 = cmovznz(x730, x726, x707);
-{ uint32_t x732 = cmovznz(x730, x723, x704);
-{ uint32_t x733 = cmovznz(x730, x720, x701);
-{ uint32_t x734 = cmovznz(x730, x717, x698);
-{ uint32_t x735 = cmovznz(x730, x714, x695);
-{ uint32_t x736 = cmovznz(x730, x711, x692);
-out[0] = x731;
-out[1] = x732;
-out[2] = x733;
-out[3] = x734;
-out[4] = x735;
-out[5] = x736;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
+ { uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
+ { uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
+ { uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
+ { uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
+ { uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
+ { uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
+ { uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
+ { uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
+ { uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
+ { uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
+ { uint32_t _; uint32_t x61 = _mulx_u32(x25, 0xaaaaaaab, &_);
+ { uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xfffffffd, &x65);
+ { uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
+ { uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
+ { uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
+ { uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
+ { uint32_t x80; uint32_t x79 = _mulx_u32(x61, 0x3fff, &x80);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(0x0, x65, x67, &x82);
+ { uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
+ { uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
+ { uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
+ { uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x77, x79, &x94);
+ { uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x80, &x97);
+ { uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x25, x64, &_);
+ { uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x43, x82, &x103);
+ { uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x46, x85, &x106);
+ { uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x49, x88, &x109);
+ { uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x52, x91, &x112);
+ { uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x55, x94, &x115);
+ { uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x58, x97, &x118);
+ { uint32_t x122; uint32_t x121 = _mulx_u32(x7, x15, &x122);
+ { uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125);
+ { uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128);
+ { uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131);
+ { uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134);
+ { uint32_t x137; uint32_t x136 = _mulx_u32(x7, x22, &x137);
+ { uint32_t x139; uint8_t x140 = _addcarryx_u32(0x0, x122, x124, &x139);
+ { uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142);
+ { uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145);
+ { uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148);
+ { uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x134, x136, &x151);
+ { uint32_t x154; uint8_t _ = _addcarryx_u32(0x0, x152, x137, &x154);
+ { uint32_t x157; uint8_t x158 = _addcarryx_u32(0x0, x103, x121, &x157);
+ { uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160);
+ { uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163);
+ { uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166);
+ { uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169);
+ { uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x118, x151, &x172);
+ { uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x119, x154, &x175);
+ { uint32_t _; uint32_t x178 = _mulx_u32(x157, 0xaaaaaaab, &_);
+ { uint32_t x182; uint32_t x181 = _mulx_u32(x178, 0xfffffffd, &x182);
+ { uint32_t x185; uint32_t x184 = _mulx_u32(x178, 0xffffffff, &x185);
+ { uint32_t x188; uint32_t x187 = _mulx_u32(x178, 0xffffffff, &x188);
+ { uint32_t x191; uint32_t x190 = _mulx_u32(x178, 0xffffffff, &x191);
+ { uint32_t x194; uint32_t x193 = _mulx_u32(x178, 0xffffffff, &x194);
+ { uint32_t x197; uint32_t x196 = _mulx_u32(x178, 0x3fff, &x197);
+ { uint32_t x199; uint8_t x200 = _addcarryx_u32(0x0, x182, x184, &x199);
+ { uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202);
+ { uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205);
+ { uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x191, x193, &x208);
+ { uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x194, x196, &x211);
+ { uint32_t x214; uint8_t _ = _addcarryx_u32(0x0, x212, x197, &x214);
+ { uint32_t _; uint8_t x218 = _addcarryx_u32(0x0, x157, x181, &_);
+ { uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x160, x199, &x220);
+ { uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x163, x202, &x223);
+ { uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x166, x205, &x226);
+ { uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x169, x208, &x229);
+ { uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x172, x211, &x232);
+ { uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x175, x214, &x235);
+ { uint8_t x237 = (x236 + x176);
+ { uint32_t x240; uint32_t x239 = _mulx_u32(x9, x15, &x240);
+ { uint32_t x243; uint32_t x242 = _mulx_u32(x9, x17, &x243);
+ { uint32_t x246; uint32_t x245 = _mulx_u32(x9, x19, &x246);
+ { uint32_t x249; uint32_t x248 = _mulx_u32(x9, x21, &x249);
+ { uint32_t x252; uint32_t x251 = _mulx_u32(x9, x23, &x252);
+ { uint32_t x255; uint32_t x254 = _mulx_u32(x9, x22, &x255);
+ { uint32_t x257; uint8_t x258 = _addcarryx_u32(0x0, x240, x242, &x257);
+ { uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260);
+ { uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263);
+ { uint32_t x266; uint8_t x267 = _addcarryx_u32(x264, x249, x251, &x266);
+ { uint32_t x269; uint8_t x270 = _addcarryx_u32(x267, x252, x254, &x269);
+ { uint32_t x272; uint8_t _ = _addcarryx_u32(0x0, x270, x255, &x272);
+ { uint32_t x275; uint8_t x276 = _addcarryx_u32(0x0, x220, x239, &x275);
+ { uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278);
+ { uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281);
+ { uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284);
+ { uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x232, x266, &x287);
+ { uint32_t x290; uint8_t x291 = _addcarryx_u32(x288, x235, x269, &x290);
+ { uint32_t x293; uint8_t x294 = _addcarryx_u32(x291, x237, x272, &x293);
+ { uint32_t _; uint32_t x296 = _mulx_u32(x275, 0xaaaaaaab, &_);
+ { uint32_t x300; uint32_t x299 = _mulx_u32(x296, 0xfffffffd, &x300);
+ { uint32_t x303; uint32_t x302 = _mulx_u32(x296, 0xffffffff, &x303);
+ { uint32_t x306; uint32_t x305 = _mulx_u32(x296, 0xffffffff, &x306);
+ { uint32_t x309; uint32_t x308 = _mulx_u32(x296, 0xffffffff, &x309);
+ { uint32_t x312; uint32_t x311 = _mulx_u32(x296, 0xffffffff, &x312);
+ { uint32_t x315; uint32_t x314 = _mulx_u32(x296, 0x3fff, &x315);
+ { uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x300, x302, &x317);
+ { uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320);
+ { uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x306, x308, &x323);
+ { uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x309, x311, &x326);
+ { uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x312, x314, &x329);
+ { uint32_t x332; uint8_t _ = _addcarryx_u32(0x0, x330, x315, &x332);
+ { uint32_t _; uint8_t x336 = _addcarryx_u32(0x0, x275, x299, &_);
+ { uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x278, x317, &x338);
+ { uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x281, x320, &x341);
+ { uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x284, x323, &x344);
+ { uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x287, x326, &x347);
+ { uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x290, x329, &x350);
+ { uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x293, x332, &x353);
+ { uint8_t x355 = (x354 + x294);
+ { uint32_t x358; uint32_t x357 = _mulx_u32(x11, x15, &x358);
+ { uint32_t x361; uint32_t x360 = _mulx_u32(x11, x17, &x361);
+ { uint32_t x364; uint32_t x363 = _mulx_u32(x11, x19, &x364);
+ { uint32_t x367; uint32_t x366 = _mulx_u32(x11, x21, &x367);
+ { uint32_t x370; uint32_t x369 = _mulx_u32(x11, x23, &x370);
+ { uint32_t x373; uint32_t x372 = _mulx_u32(x11, x22, &x373);
+ { uint32_t x375; uint8_t x376 = _addcarryx_u32(0x0, x358, x360, &x375);
+ { uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
+ { uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x364, x366, &x381);
+ { uint32_t x384; uint8_t x385 = _addcarryx_u32(x382, x367, x369, &x384);
+ { uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x370, x372, &x387);
+ { uint32_t x390; uint8_t _ = _addcarryx_u32(0x0, x388, x373, &x390);
+ { uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x338, x357, &x393);
+ { uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
+ { uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
+ { uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x347, x381, &x402);
+ { uint32_t x405; uint8_t x406 = _addcarryx_u32(x403, x350, x384, &x405);
+ { uint32_t x408; uint8_t x409 = _addcarryx_u32(x406, x353, x387, &x408);
+ { uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x355, x390, &x411);
+ { uint32_t _; uint32_t x414 = _mulx_u32(x393, 0xaaaaaaab, &_);
+ { uint32_t x418; uint32_t x417 = _mulx_u32(x414, 0xfffffffd, &x418);
+ { uint32_t x421; uint32_t x420 = _mulx_u32(x414, 0xffffffff, &x421);
+ { uint32_t x424; uint32_t x423 = _mulx_u32(x414, 0xffffffff, &x424);
+ { uint32_t x427; uint32_t x426 = _mulx_u32(x414, 0xffffffff, &x427);
+ { uint32_t x430; uint32_t x429 = _mulx_u32(x414, 0xffffffff, &x430);
+ { uint32_t x433; uint32_t x432 = _mulx_u32(x414, 0x3fff, &x433);
+ { uint32_t x435; uint8_t x436 = _addcarryx_u32(0x0, x418, x420, &x435);
+ { uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x421, x423, &x438);
+ { uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x424, x426, &x441);
+ { uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x427, x429, &x444);
+ { uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x430, x432, &x447);
+ { uint32_t x450; uint8_t _ = _addcarryx_u32(0x0, x448, x433, &x450);
+ { uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x393, x417, &_);
+ { uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x396, x435, &x456);
+ { uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x399, x438, &x459);
+ { uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x402, x441, &x462);
+ { uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x405, x444, &x465);
+ { uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x408, x447, &x468);
+ { uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x411, x450, &x471);
+ { uint8_t x473 = (x472 + x412);
+ { uint32_t x476; uint32_t x475 = _mulx_u32(x13, x15, &x476);
+ { uint32_t x479; uint32_t x478 = _mulx_u32(x13, x17, &x479);
+ { uint32_t x482; uint32_t x481 = _mulx_u32(x13, x19, &x482);
+ { uint32_t x485; uint32_t x484 = _mulx_u32(x13, x21, &x485);
+ { uint32_t x488; uint32_t x487 = _mulx_u32(x13, x23, &x488);
+ { uint32_t x491; uint32_t x490 = _mulx_u32(x13, x22, &x491);
+ { uint32_t x493; uint8_t x494 = _addcarryx_u32(0x0, x476, x478, &x493);
+ { uint32_t x496; uint8_t x497 = _addcarryx_u32(x494, x479, x481, &x496);
+ { uint32_t x499; uint8_t x500 = _addcarryx_u32(x497, x482, x484, &x499);
+ { uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x485, x487, &x502);
+ { uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x488, x490, &x505);
+ { uint32_t x508; uint8_t _ = _addcarryx_u32(0x0, x506, x491, &x508);
+ { uint32_t x511; uint8_t x512 = _addcarryx_u32(0x0, x456, x475, &x511);
+ { uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514);
+ { uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x462, x496, &x517);
+ { uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x465, x499, &x520);
+ { uint32_t x523; uint8_t x524 = _addcarryx_u32(x521, x468, x502, &x523);
+ { uint32_t x526; uint8_t x527 = _addcarryx_u32(x524, x471, x505, &x526);
+ { uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x473, x508, &x529);
+ { uint32_t _; uint32_t x532 = _mulx_u32(x511, 0xaaaaaaab, &_);
+ { uint32_t x536; uint32_t x535 = _mulx_u32(x532, 0xfffffffd, &x536);
+ { uint32_t x539; uint32_t x538 = _mulx_u32(x532, 0xffffffff, &x539);
+ { uint32_t x542; uint32_t x541 = _mulx_u32(x532, 0xffffffff, &x542);
+ { uint32_t x545; uint32_t x544 = _mulx_u32(x532, 0xffffffff, &x545);
+ { uint32_t x548; uint32_t x547 = _mulx_u32(x532, 0xffffffff, &x548);
+ { uint32_t x551; uint32_t x550 = _mulx_u32(x532, 0x3fff, &x551);
+ { uint32_t x553; uint8_t x554 = _addcarryx_u32(0x0, x536, x538, &x553);
+ { uint32_t x556; uint8_t x557 = _addcarryx_u32(x554, x539, x541, &x556);
+ { uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x542, x544, &x559);
+ { uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x545, x547, &x562);
+ { uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x548, x550, &x565);
+ { uint32_t x568; uint8_t _ = _addcarryx_u32(0x0, x566, x551, &x568);
+ { uint32_t _; uint8_t x572 = _addcarryx_u32(0x0, x511, x535, &_);
+ { uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x514, x553, &x574);
+ { uint32_t x577; uint8_t x578 = _addcarryx_u32(x575, x517, x556, &x577);
+ { uint32_t x580; uint8_t x581 = _addcarryx_u32(x578, x520, x559, &x580);
+ { uint32_t x583; uint8_t x584 = _addcarryx_u32(x581, x523, x562, &x583);
+ { uint32_t x586; uint8_t x587 = _addcarryx_u32(x584, x526, x565, &x586);
+ { uint32_t x589; uint8_t x590 = _addcarryx_u32(x587, x529, x568, &x589);
+ { uint8_t x591 = (x590 + x530);
+ { uint32_t x594; uint32_t x593 = _mulx_u32(x12, x15, &x594);
+ { uint32_t x597; uint32_t x596 = _mulx_u32(x12, x17, &x597);
+ { uint32_t x600; uint32_t x599 = _mulx_u32(x12, x19, &x600);
+ { uint32_t x603; uint32_t x602 = _mulx_u32(x12, x21, &x603);
+ { uint32_t x606; uint32_t x605 = _mulx_u32(x12, x23, &x606);
+ { uint32_t x609; uint32_t x608 = _mulx_u32(x12, x22, &x609);
+ { uint32_t x611; uint8_t x612 = _addcarryx_u32(0x0, x594, x596, &x611);
+ { uint32_t x614; uint8_t x615 = _addcarryx_u32(x612, x597, x599, &x614);
+ { uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x600, x602, &x617);
+ { uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x603, x605, &x620);
+ { uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x606, x608, &x623);
+ { uint32_t x626; uint8_t _ = _addcarryx_u32(0x0, x624, x609, &x626);
+ { uint32_t x629; uint8_t x630 = _addcarryx_u32(0x0, x574, x593, &x629);
+ { uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x577, x611, &x632);
+ { uint32_t x635; uint8_t x636 = _addcarryx_u32(x633, x580, x614, &x635);
+ { uint32_t x638; uint8_t x639 = _addcarryx_u32(x636, x583, x617, &x638);
+ { uint32_t x641; uint8_t x642 = _addcarryx_u32(x639, x586, x620, &x641);
+ { uint32_t x644; uint8_t x645 = _addcarryx_u32(x642, x589, x623, &x644);
+ { uint32_t x647; uint8_t x648 = _addcarryx_u32(x645, x591, x626, &x647);
+ { uint32_t _; uint32_t x650 = _mulx_u32(x629, 0xaaaaaaab, &_);
+ { uint32_t x654; uint32_t x653 = _mulx_u32(x650, 0xfffffffd, &x654);
+ { uint32_t x657; uint32_t x656 = _mulx_u32(x650, 0xffffffff, &x657);
+ { uint32_t x660; uint32_t x659 = _mulx_u32(x650, 0xffffffff, &x660);
+ { uint32_t x663; uint32_t x662 = _mulx_u32(x650, 0xffffffff, &x663);
+ { uint32_t x666; uint32_t x665 = _mulx_u32(x650, 0xffffffff, &x666);
+ { uint32_t x669; uint32_t x668 = _mulx_u32(x650, 0x3fff, &x669);
+ { uint32_t x671; uint8_t x672 = _addcarryx_u32(0x0, x654, x656, &x671);
+ { uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x657, x659, &x674);
+ { uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x660, x662, &x677);
+ { uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x663, x665, &x680);
+ { uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x666, x668, &x683);
+ { uint32_t x686; uint8_t _ = _addcarryx_u32(0x0, x684, x669, &x686);
+ { uint32_t _; uint8_t x690 = _addcarryx_u32(0x0, x629, x653, &_);
+ { uint32_t x692; uint8_t x693 = _addcarryx_u32(x690, x632, x671, &x692);
+ { uint32_t x695; uint8_t x696 = _addcarryx_u32(x693, x635, x674, &x695);
+ { uint32_t x698; uint8_t x699 = _addcarryx_u32(x696, x638, x677, &x698);
+ { uint32_t x701; uint8_t x702 = _addcarryx_u32(x699, x641, x680, &x701);
+ { uint32_t x704; uint8_t x705 = _addcarryx_u32(x702, x644, x683, &x704);
+ { uint32_t x707; uint8_t x708 = _addcarryx_u32(x705, x647, x686, &x707);
+ { uint8_t x709 = (x708 + x648);
+ { uint32_t x711; uint8_t x712 = _subborrow_u32(0x0, x692, 0xfffffffd, &x711);
+ { uint32_t x714; uint8_t x715 = _subborrow_u32(x712, x695, 0xffffffff, &x714);
+ { uint32_t x717; uint8_t x718 = _subborrow_u32(x715, x698, 0xffffffff, &x717);
+ { uint32_t x720; uint8_t x721 = _subborrow_u32(x718, x701, 0xffffffff, &x720);
+ { uint32_t x723; uint8_t x724 = _subborrow_u32(x721, x704, 0xffffffff, &x723);
+ { uint32_t x726; uint8_t x727 = _subborrow_u32(x724, x707, 0x3fff, &x726);
+ { uint32_t _; uint8_t x730 = _subborrow_u32(x727, x709, 0x0, &_);
+ { uint32_t x731 = cmovznz(x730, x726, x707);
+ { uint32_t x732 = cmovznz(x730, x723, x704);
+ { uint32_t x733 = cmovznz(x730, x720, x701);
+ { uint32_t x734 = cmovznz(x730, x717, x698);
+ { uint32_t x735 = cmovznz(x730, x714, x695);
+ { uint32_t x736 = cmovznz(x730, x711, x692);
+ out[0] = x736;
+ out[1] = x735;
+ out[2] = x734;
+ out[3] = x733;
+ out[4] = x732;
+ out[5] = x731;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e174m3/fenz.c b/src/Specific/montgomery32_2e174m3/fenz.c
index 6d8132b20..2e0454af1 100644
--- a/src/Specific/montgomery32_2e174m3/fenz.c
+++ b/src/Specific/montgomery32_2e174m3/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x11 = (x10 | x9);
-{ uint32_t x12 = (x8 | x11);
-{ uint32_t x13 = (x6 | x12);
-{ uint32_t x14 = (x4 | x13);
-{ uint32_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x11 = (x10 | x9);
+ { uint32_t x12 = (x8 | x11);
+ { uint32_t x13 = (x6 | x12);
+ { uint32_t x14 = (x4 | x13);
+ { uint32_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e174m3/feopp.c b/src/Specific/montgomery32_2e174m3/feopp.c
index fd49d0c28..d8146fb04 100644
--- a/src/Specific/montgomery32_2e174m3/feopp.c
+++ b/src/Specific/montgomery32_2e174m3/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
-{ uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
-{ uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
-{ uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
-{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
-{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
-{ uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
-{ uint32_t x30 = (x29 & 0xfffffffd);
-{ uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
-{ uint32_t x34 = (x29 & 0xffffffff);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
-{ uint32_t x38 = (x29 & 0xffffffff);
-{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
-{ uint32_t x42 = (x29 & 0xffffffff);
-{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
-{ uint32_t x46 = (x29 & 0xffffffff);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
-{ uint32_t x50 = (x29 & 0x3fff);
-{ uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
+ { uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
+ { uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
+ { uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
+ { uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
+ { uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
+ { uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+ { uint32_t x30 = (x29 & 0xfffffffd);
+ { uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
+ { uint32_t x34 = (x29 & 0xffffffff);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
+ { uint32_t x38 = (x29 & 0xffffffff);
+ { uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
+ { uint32_t x42 = (x29 & 0xffffffff);
+ { uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
+ { uint32_t x46 = (x29 & 0xffffffff);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
+ { uint32_t x50 = (x29 & 0x3fff);
+ { uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e174m3/fesub.c b/src/Specific/montgomery32_2e174m3/fesub.c
index 111a2f165..560115eb6 100644
--- a/src/Specific/montgomery32_2e174m3/fesub.c
+++ b/src/Specific/montgomery32_2e174m3/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
-{ uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
-{ uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
-{ uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
-{ uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
-{ uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
-{ uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
-{ uint32_t x43 = (x42 & 0xfffffffd);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
-{ uint32_t x47 = (x42 & 0xffffffff);
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
-{ uint32_t x51 = (x42 & 0xffffffff);
-{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
-{ uint32_t x55 = (x42 & 0xffffffff);
-{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
-{ uint32_t x59 = (x42 & 0xffffffff);
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
-{ uint32_t x63 = (x42 & 0x3fff);
-{ uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
+ { uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
+ { uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
+ { uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
+ { uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
+ { uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
+ { uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
+ { uint32_t x43 = (x42 & 0xfffffffd);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
+ { uint32_t x47 = (x42 & 0xffffffff);
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
+ { uint32_t x51 = (x42 & 0xffffffff);
+ { uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
+ { uint32_t x55 = (x42 & 0xffffffff);
+ { uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
+ { uint32_t x59 = (x42 & 0xffffffff);
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
+ { uint32_t x63 = (x42 & 0x3fff);
+ { uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e189m25/feadd.c b/src/Specific/montgomery32_2e189m25/feadd.c
index c33b5cae7..e8ad28bbe 100644
--- a/src/Specific/montgomery32_2e189m25/feadd.c
+++ b/src/Specific/montgomery32_2e189m25/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
-{ uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
-{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
-{ uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
-{ uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
-{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
-{ uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xffffffe7, &x43);
-{ uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
-{ uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
-{ uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
-{ uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
-{ uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x1fffffff, &x58);
-{ uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
-{ uint32_t x63 = cmovznz(x62, x58, x40);
-{ uint32_t x64 = cmovznz(x62, x55, x37);
-{ uint32_t x65 = cmovznz(x62, x52, x34);
-{ uint32_t x66 = cmovznz(x62, x49, x31);
-{ uint32_t x67 = cmovznz(x62, x46, x28);
-{ uint32_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
+ { uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
+ { uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
+ { uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
+ { uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
+ { uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
+ { uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xffffffe7, &x43);
+ { uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
+ { uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
+ { uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
+ { uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
+ { uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x1fffffff, &x58);
+ { uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
+ { uint32_t x63 = cmovznz(x62, x58, x40);
+ { uint32_t x64 = cmovznz(x62, x55, x37);
+ { uint32_t x65 = cmovznz(x62, x52, x34);
+ { uint32_t x66 = cmovznz(x62, x49, x31);
+ { uint32_t x67 = cmovznz(x62, x46, x28);
+ { uint32_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e189m25/femul.c b/src/Specific/montgomery32_2e189m25/femul.c
index ce597761b..416fe47bd 100644
--- a/src/Specific/montgomery32_2e189m25/femul.c
+++ b/src/Specific/montgomery32_2e189m25/femul.c
@@ -1,272 +1,266 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
-{ uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
-{ uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
-{ uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
-{ uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
-{ uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
-{ uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
-{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
-{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
-{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
-{ uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
-{ uint32_t _; uint32_t x61 = _mulx_u32(x25, 0xc28f5c29, &_);
-{ uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xffffffe7, &x65);
-{ uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
-{ uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
-{ uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
-{ uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
-{ uint32_t x80; uint32_t x79 = _mulx_u32(x61, 0x1fffffff, &x80);
-{ uint32_t x82; uint8_t x83 = _addcarryx_u32(0x0, x65, x67, &x82);
-{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
-{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
-{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
-{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x77, x79, &x94);
-{ uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x80, &x97);
-{ uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x25, x64, &_);
-{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x43, x82, &x103);
-{ uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x46, x85, &x106);
-{ uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x49, x88, &x109);
-{ uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x52, x91, &x112);
-{ uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x55, x94, &x115);
-{ uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x58, x97, &x118);
-{ uint32_t x122; uint32_t x121 = _mulx_u32(x7, x15, &x122);
-{ uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125);
-{ uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128);
-{ uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131);
-{ uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134);
-{ uint32_t x137; uint32_t x136 = _mulx_u32(x7, x22, &x137);
-{ uint32_t x139; uint8_t x140 = _addcarryx_u32(0x0, x122, x124, &x139);
-{ uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142);
-{ uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145);
-{ uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148);
-{ uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x134, x136, &x151);
-{ uint32_t x154; uint8_t _ = _addcarryx_u32(0x0, x152, x137, &x154);
-{ uint32_t x157; uint8_t x158 = _addcarryx_u32(0x0, x103, x121, &x157);
-{ uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160);
-{ uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163);
-{ uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166);
-{ uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169);
-{ uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x118, x151, &x172);
-{ uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x119, x154, &x175);
-{ uint32_t _; uint32_t x178 = _mulx_u32(x157, 0xc28f5c29, &_);
-{ uint32_t x182; uint32_t x181 = _mulx_u32(x178, 0xffffffe7, &x182);
-{ uint32_t x185; uint32_t x184 = _mulx_u32(x178, 0xffffffff, &x185);
-{ uint32_t x188; uint32_t x187 = _mulx_u32(x178, 0xffffffff, &x188);
-{ uint32_t x191; uint32_t x190 = _mulx_u32(x178, 0xffffffff, &x191);
-{ uint32_t x194; uint32_t x193 = _mulx_u32(x178, 0xffffffff, &x194);
-{ uint32_t x197; uint32_t x196 = _mulx_u32(x178, 0x1fffffff, &x197);
-{ uint32_t x199; uint8_t x200 = _addcarryx_u32(0x0, x182, x184, &x199);
-{ uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202);
-{ uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205);
-{ uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x191, x193, &x208);
-{ uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x194, x196, &x211);
-{ uint32_t x214; uint8_t _ = _addcarryx_u32(0x0, x212, x197, &x214);
-{ uint32_t _; uint8_t x218 = _addcarryx_u32(0x0, x157, x181, &_);
-{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x160, x199, &x220);
-{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x163, x202, &x223);
-{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x166, x205, &x226);
-{ uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x169, x208, &x229);
-{ uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x172, x211, &x232);
-{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x175, x214, &x235);
-{ uint8_t x237 = (x236 + x176);
-{ uint32_t x240; uint32_t x239 = _mulx_u32(x9, x15, &x240);
-{ uint32_t x243; uint32_t x242 = _mulx_u32(x9, x17, &x243);
-{ uint32_t x246; uint32_t x245 = _mulx_u32(x9, x19, &x246);
-{ uint32_t x249; uint32_t x248 = _mulx_u32(x9, x21, &x249);
-{ uint32_t x252; uint32_t x251 = _mulx_u32(x9, x23, &x252);
-{ uint32_t x255; uint32_t x254 = _mulx_u32(x9, x22, &x255);
-{ uint32_t x257; uint8_t x258 = _addcarryx_u32(0x0, x240, x242, &x257);
-{ uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260);
-{ uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263);
-{ uint32_t x266; uint8_t x267 = _addcarryx_u32(x264, x249, x251, &x266);
-{ uint32_t x269; uint8_t x270 = _addcarryx_u32(x267, x252, x254, &x269);
-{ uint32_t x272; uint8_t _ = _addcarryx_u32(0x0, x270, x255, &x272);
-{ uint32_t x275; uint8_t x276 = _addcarryx_u32(0x0, x220, x239, &x275);
-{ uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278);
-{ uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281);
-{ uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284);
-{ uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x232, x266, &x287);
-{ uint32_t x290; uint8_t x291 = _addcarryx_u32(x288, x235, x269, &x290);
-{ uint32_t x293; uint8_t x294 = _addcarryx_u32(x291, x237, x272, &x293);
-{ uint32_t _; uint32_t x296 = _mulx_u32(x275, 0xc28f5c29, &_);
-{ uint32_t x300; uint32_t x299 = _mulx_u32(x296, 0xffffffe7, &x300);
-{ uint32_t x303; uint32_t x302 = _mulx_u32(x296, 0xffffffff, &x303);
-{ uint32_t x306; uint32_t x305 = _mulx_u32(x296, 0xffffffff, &x306);
-{ uint32_t x309; uint32_t x308 = _mulx_u32(x296, 0xffffffff, &x309);
-{ uint32_t x312; uint32_t x311 = _mulx_u32(x296, 0xffffffff, &x312);
-{ uint32_t x315; uint32_t x314 = _mulx_u32(x296, 0x1fffffff, &x315);
-{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x300, x302, &x317);
-{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320);
-{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x306, x308, &x323);
-{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x309, x311, &x326);
-{ uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x312, x314, &x329);
-{ uint32_t x332; uint8_t _ = _addcarryx_u32(0x0, x330, x315, &x332);
-{ uint32_t _; uint8_t x336 = _addcarryx_u32(0x0, x275, x299, &_);
-{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x278, x317, &x338);
-{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x281, x320, &x341);
-{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x284, x323, &x344);
-{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x287, x326, &x347);
-{ uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x290, x329, &x350);
-{ uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x293, x332, &x353);
-{ uint8_t x355 = (x354 + x294);
-{ uint32_t x358; uint32_t x357 = _mulx_u32(x11, x15, &x358);
-{ uint32_t x361; uint32_t x360 = _mulx_u32(x11, x17, &x361);
-{ uint32_t x364; uint32_t x363 = _mulx_u32(x11, x19, &x364);
-{ uint32_t x367; uint32_t x366 = _mulx_u32(x11, x21, &x367);
-{ uint32_t x370; uint32_t x369 = _mulx_u32(x11, x23, &x370);
-{ uint32_t x373; uint32_t x372 = _mulx_u32(x11, x22, &x373);
-{ uint32_t x375; uint8_t x376 = _addcarryx_u32(0x0, x358, x360, &x375);
-{ uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
-{ uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x364, x366, &x381);
-{ uint32_t x384; uint8_t x385 = _addcarryx_u32(x382, x367, x369, &x384);
-{ uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x370, x372, &x387);
-{ uint32_t x390; uint8_t _ = _addcarryx_u32(0x0, x388, x373, &x390);
-{ uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x338, x357, &x393);
-{ uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
-{ uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
-{ uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x347, x381, &x402);
-{ uint32_t x405; uint8_t x406 = _addcarryx_u32(x403, x350, x384, &x405);
-{ uint32_t x408; uint8_t x409 = _addcarryx_u32(x406, x353, x387, &x408);
-{ uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x355, x390, &x411);
-{ uint32_t _; uint32_t x414 = _mulx_u32(x393, 0xc28f5c29, &_);
-{ uint32_t x418; uint32_t x417 = _mulx_u32(x414, 0xffffffe7, &x418);
-{ uint32_t x421; uint32_t x420 = _mulx_u32(x414, 0xffffffff, &x421);
-{ uint32_t x424; uint32_t x423 = _mulx_u32(x414, 0xffffffff, &x424);
-{ uint32_t x427; uint32_t x426 = _mulx_u32(x414, 0xffffffff, &x427);
-{ uint32_t x430; uint32_t x429 = _mulx_u32(x414, 0xffffffff, &x430);
-{ uint32_t x433; uint32_t x432 = _mulx_u32(x414, 0x1fffffff, &x433);
-{ uint32_t x435; uint8_t x436 = _addcarryx_u32(0x0, x418, x420, &x435);
-{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x421, x423, &x438);
-{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x424, x426, &x441);
-{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x427, x429, &x444);
-{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x430, x432, &x447);
-{ uint32_t x450; uint8_t _ = _addcarryx_u32(0x0, x448, x433, &x450);
-{ uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x393, x417, &_);
-{ uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x396, x435, &x456);
-{ uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x399, x438, &x459);
-{ uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x402, x441, &x462);
-{ uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x405, x444, &x465);
-{ uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x408, x447, &x468);
-{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x411, x450, &x471);
-{ uint8_t x473 = (x472 + x412);
-{ uint32_t x476; uint32_t x475 = _mulx_u32(x13, x15, &x476);
-{ uint32_t x479; uint32_t x478 = _mulx_u32(x13, x17, &x479);
-{ uint32_t x482; uint32_t x481 = _mulx_u32(x13, x19, &x482);
-{ uint32_t x485; uint32_t x484 = _mulx_u32(x13, x21, &x485);
-{ uint32_t x488; uint32_t x487 = _mulx_u32(x13, x23, &x488);
-{ uint32_t x491; uint32_t x490 = _mulx_u32(x13, x22, &x491);
-{ uint32_t x493; uint8_t x494 = _addcarryx_u32(0x0, x476, x478, &x493);
-{ uint32_t x496; uint8_t x497 = _addcarryx_u32(x494, x479, x481, &x496);
-{ uint32_t x499; uint8_t x500 = _addcarryx_u32(x497, x482, x484, &x499);
-{ uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x485, x487, &x502);
-{ uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x488, x490, &x505);
-{ uint32_t x508; uint8_t _ = _addcarryx_u32(0x0, x506, x491, &x508);
-{ uint32_t x511; uint8_t x512 = _addcarryx_u32(0x0, x456, x475, &x511);
-{ uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514);
-{ uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x462, x496, &x517);
-{ uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x465, x499, &x520);
-{ uint32_t x523; uint8_t x524 = _addcarryx_u32(x521, x468, x502, &x523);
-{ uint32_t x526; uint8_t x527 = _addcarryx_u32(x524, x471, x505, &x526);
-{ uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x473, x508, &x529);
-{ uint32_t _; uint32_t x532 = _mulx_u32(x511, 0xc28f5c29, &_);
-{ uint32_t x536; uint32_t x535 = _mulx_u32(x532, 0xffffffe7, &x536);
-{ uint32_t x539; uint32_t x538 = _mulx_u32(x532, 0xffffffff, &x539);
-{ uint32_t x542; uint32_t x541 = _mulx_u32(x532, 0xffffffff, &x542);
-{ uint32_t x545; uint32_t x544 = _mulx_u32(x532, 0xffffffff, &x545);
-{ uint32_t x548; uint32_t x547 = _mulx_u32(x532, 0xffffffff, &x548);
-{ uint32_t x551; uint32_t x550 = _mulx_u32(x532, 0x1fffffff, &x551);
-{ uint32_t x553; uint8_t x554 = _addcarryx_u32(0x0, x536, x538, &x553);
-{ uint32_t x556; uint8_t x557 = _addcarryx_u32(x554, x539, x541, &x556);
-{ uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x542, x544, &x559);
-{ uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x545, x547, &x562);
-{ uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x548, x550, &x565);
-{ uint32_t x568; uint8_t _ = _addcarryx_u32(0x0, x566, x551, &x568);
-{ uint32_t _; uint8_t x572 = _addcarryx_u32(0x0, x511, x535, &_);
-{ uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x514, x553, &x574);
-{ uint32_t x577; uint8_t x578 = _addcarryx_u32(x575, x517, x556, &x577);
-{ uint32_t x580; uint8_t x581 = _addcarryx_u32(x578, x520, x559, &x580);
-{ uint32_t x583; uint8_t x584 = _addcarryx_u32(x581, x523, x562, &x583);
-{ uint32_t x586; uint8_t x587 = _addcarryx_u32(x584, x526, x565, &x586);
-{ uint32_t x589; uint8_t x590 = _addcarryx_u32(x587, x529, x568, &x589);
-{ uint8_t x591 = (x590 + x530);
-{ uint32_t x594; uint32_t x593 = _mulx_u32(x12, x15, &x594);
-{ uint32_t x597; uint32_t x596 = _mulx_u32(x12, x17, &x597);
-{ uint32_t x600; uint32_t x599 = _mulx_u32(x12, x19, &x600);
-{ uint32_t x603; uint32_t x602 = _mulx_u32(x12, x21, &x603);
-{ uint32_t x606; uint32_t x605 = _mulx_u32(x12, x23, &x606);
-{ uint32_t x609; uint32_t x608 = _mulx_u32(x12, x22, &x609);
-{ uint32_t x611; uint8_t x612 = _addcarryx_u32(0x0, x594, x596, &x611);
-{ uint32_t x614; uint8_t x615 = _addcarryx_u32(x612, x597, x599, &x614);
-{ uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x600, x602, &x617);
-{ uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x603, x605, &x620);
-{ uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x606, x608, &x623);
-{ uint32_t x626; uint8_t _ = _addcarryx_u32(0x0, x624, x609, &x626);
-{ uint32_t x629; uint8_t x630 = _addcarryx_u32(0x0, x574, x593, &x629);
-{ uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x577, x611, &x632);
-{ uint32_t x635; uint8_t x636 = _addcarryx_u32(x633, x580, x614, &x635);
-{ uint32_t x638; uint8_t x639 = _addcarryx_u32(x636, x583, x617, &x638);
-{ uint32_t x641; uint8_t x642 = _addcarryx_u32(x639, x586, x620, &x641);
-{ uint32_t x644; uint8_t x645 = _addcarryx_u32(x642, x589, x623, &x644);
-{ uint32_t x647; uint8_t x648 = _addcarryx_u32(x645, x591, x626, &x647);
-{ uint32_t _; uint32_t x650 = _mulx_u32(x629, 0xc28f5c29, &_);
-{ uint32_t x654; uint32_t x653 = _mulx_u32(x650, 0xffffffe7, &x654);
-{ uint32_t x657; uint32_t x656 = _mulx_u32(x650, 0xffffffff, &x657);
-{ uint32_t x660; uint32_t x659 = _mulx_u32(x650, 0xffffffff, &x660);
-{ uint32_t x663; uint32_t x662 = _mulx_u32(x650, 0xffffffff, &x663);
-{ uint32_t x666; uint32_t x665 = _mulx_u32(x650, 0xffffffff, &x666);
-{ uint32_t x669; uint32_t x668 = _mulx_u32(x650, 0x1fffffff, &x669);
-{ uint32_t x671; uint8_t x672 = _addcarryx_u32(0x0, x654, x656, &x671);
-{ uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x657, x659, &x674);
-{ uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x660, x662, &x677);
-{ uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x663, x665, &x680);
-{ uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x666, x668, &x683);
-{ uint32_t x686; uint8_t _ = _addcarryx_u32(0x0, x684, x669, &x686);
-{ uint32_t _; uint8_t x690 = _addcarryx_u32(0x0, x629, x653, &_);
-{ uint32_t x692; uint8_t x693 = _addcarryx_u32(x690, x632, x671, &x692);
-{ uint32_t x695; uint8_t x696 = _addcarryx_u32(x693, x635, x674, &x695);
-{ uint32_t x698; uint8_t x699 = _addcarryx_u32(x696, x638, x677, &x698);
-{ uint32_t x701; uint8_t x702 = _addcarryx_u32(x699, x641, x680, &x701);
-{ uint32_t x704; uint8_t x705 = _addcarryx_u32(x702, x644, x683, &x704);
-{ uint32_t x707; uint8_t x708 = _addcarryx_u32(x705, x647, x686, &x707);
-{ uint8_t x709 = (x708 + x648);
-{ uint32_t x711; uint8_t x712 = _subborrow_u32(0x0, x692, 0xffffffe7, &x711);
-{ uint32_t x714; uint8_t x715 = _subborrow_u32(x712, x695, 0xffffffff, &x714);
-{ uint32_t x717; uint8_t x718 = _subborrow_u32(x715, x698, 0xffffffff, &x717);
-{ uint32_t x720; uint8_t x721 = _subborrow_u32(x718, x701, 0xffffffff, &x720);
-{ uint32_t x723; uint8_t x724 = _subborrow_u32(x721, x704, 0xffffffff, &x723);
-{ uint32_t x726; uint8_t x727 = _subborrow_u32(x724, x707, 0x1fffffff, &x726);
-{ uint32_t _; uint8_t x730 = _subborrow_u32(x727, x709, 0x0, &_);
-{ uint32_t x731 = cmovznz(x730, x726, x707);
-{ uint32_t x732 = cmovznz(x730, x723, x704);
-{ uint32_t x733 = cmovznz(x730, x720, x701);
-{ uint32_t x734 = cmovznz(x730, x717, x698);
-{ uint32_t x735 = cmovznz(x730, x714, x695);
-{ uint32_t x736 = cmovznz(x730, x711, x692);
-out[0] = x731;
-out[1] = x732;
-out[2] = x733;
-out[3] = x734;
-out[4] = x735;
-out[5] = x736;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
+ { uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
+ { uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
+ { uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
+ { uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
+ { uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
+ { uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
+ { uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
+ { uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
+ { uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
+ { uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
+ { uint32_t _; uint32_t x61 = _mulx_u32(x25, 0xc28f5c29, &_);
+ { uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xffffffe7, &x65);
+ { uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
+ { uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
+ { uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
+ { uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
+ { uint32_t x80; uint32_t x79 = _mulx_u32(x61, 0x1fffffff, &x80);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(0x0, x65, x67, &x82);
+ { uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
+ { uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
+ { uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
+ { uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x77, x79, &x94);
+ { uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x80, &x97);
+ { uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x25, x64, &_);
+ { uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x43, x82, &x103);
+ { uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x46, x85, &x106);
+ { uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x49, x88, &x109);
+ { uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x52, x91, &x112);
+ { uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x55, x94, &x115);
+ { uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x58, x97, &x118);
+ { uint32_t x122; uint32_t x121 = _mulx_u32(x7, x15, &x122);
+ { uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125);
+ { uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128);
+ { uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131);
+ { uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134);
+ { uint32_t x137; uint32_t x136 = _mulx_u32(x7, x22, &x137);
+ { uint32_t x139; uint8_t x140 = _addcarryx_u32(0x0, x122, x124, &x139);
+ { uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142);
+ { uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145);
+ { uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148);
+ { uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x134, x136, &x151);
+ { uint32_t x154; uint8_t _ = _addcarryx_u32(0x0, x152, x137, &x154);
+ { uint32_t x157; uint8_t x158 = _addcarryx_u32(0x0, x103, x121, &x157);
+ { uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160);
+ { uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163);
+ { uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166);
+ { uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169);
+ { uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x118, x151, &x172);
+ { uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x119, x154, &x175);
+ { uint32_t _; uint32_t x178 = _mulx_u32(x157, 0xc28f5c29, &_);
+ { uint32_t x182; uint32_t x181 = _mulx_u32(x178, 0xffffffe7, &x182);
+ { uint32_t x185; uint32_t x184 = _mulx_u32(x178, 0xffffffff, &x185);
+ { uint32_t x188; uint32_t x187 = _mulx_u32(x178, 0xffffffff, &x188);
+ { uint32_t x191; uint32_t x190 = _mulx_u32(x178, 0xffffffff, &x191);
+ { uint32_t x194; uint32_t x193 = _mulx_u32(x178, 0xffffffff, &x194);
+ { uint32_t x197; uint32_t x196 = _mulx_u32(x178, 0x1fffffff, &x197);
+ { uint32_t x199; uint8_t x200 = _addcarryx_u32(0x0, x182, x184, &x199);
+ { uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202);
+ { uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205);
+ { uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x191, x193, &x208);
+ { uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x194, x196, &x211);
+ { uint32_t x214; uint8_t _ = _addcarryx_u32(0x0, x212, x197, &x214);
+ { uint32_t _; uint8_t x218 = _addcarryx_u32(0x0, x157, x181, &_);
+ { uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x160, x199, &x220);
+ { uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x163, x202, &x223);
+ { uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x166, x205, &x226);
+ { uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x169, x208, &x229);
+ { uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x172, x211, &x232);
+ { uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x175, x214, &x235);
+ { uint8_t x237 = (x236 + x176);
+ { uint32_t x240; uint32_t x239 = _mulx_u32(x9, x15, &x240);
+ { uint32_t x243; uint32_t x242 = _mulx_u32(x9, x17, &x243);
+ { uint32_t x246; uint32_t x245 = _mulx_u32(x9, x19, &x246);
+ { uint32_t x249; uint32_t x248 = _mulx_u32(x9, x21, &x249);
+ { uint32_t x252; uint32_t x251 = _mulx_u32(x9, x23, &x252);
+ { uint32_t x255; uint32_t x254 = _mulx_u32(x9, x22, &x255);
+ { uint32_t x257; uint8_t x258 = _addcarryx_u32(0x0, x240, x242, &x257);
+ { uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260);
+ { uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263);
+ { uint32_t x266; uint8_t x267 = _addcarryx_u32(x264, x249, x251, &x266);
+ { uint32_t x269; uint8_t x270 = _addcarryx_u32(x267, x252, x254, &x269);
+ { uint32_t x272; uint8_t _ = _addcarryx_u32(0x0, x270, x255, &x272);
+ { uint32_t x275; uint8_t x276 = _addcarryx_u32(0x0, x220, x239, &x275);
+ { uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278);
+ { uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281);
+ { uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284);
+ { uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x232, x266, &x287);
+ { uint32_t x290; uint8_t x291 = _addcarryx_u32(x288, x235, x269, &x290);
+ { uint32_t x293; uint8_t x294 = _addcarryx_u32(x291, x237, x272, &x293);
+ { uint32_t _; uint32_t x296 = _mulx_u32(x275, 0xc28f5c29, &_);
+ { uint32_t x300; uint32_t x299 = _mulx_u32(x296, 0xffffffe7, &x300);
+ { uint32_t x303; uint32_t x302 = _mulx_u32(x296, 0xffffffff, &x303);
+ { uint32_t x306; uint32_t x305 = _mulx_u32(x296, 0xffffffff, &x306);
+ { uint32_t x309; uint32_t x308 = _mulx_u32(x296, 0xffffffff, &x309);
+ { uint32_t x312; uint32_t x311 = _mulx_u32(x296, 0xffffffff, &x312);
+ { uint32_t x315; uint32_t x314 = _mulx_u32(x296, 0x1fffffff, &x315);
+ { uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x300, x302, &x317);
+ { uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320);
+ { uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x306, x308, &x323);
+ { uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x309, x311, &x326);
+ { uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x312, x314, &x329);
+ { uint32_t x332; uint8_t _ = _addcarryx_u32(0x0, x330, x315, &x332);
+ { uint32_t _; uint8_t x336 = _addcarryx_u32(0x0, x275, x299, &_);
+ { uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x278, x317, &x338);
+ { uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x281, x320, &x341);
+ { uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x284, x323, &x344);
+ { uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x287, x326, &x347);
+ { uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x290, x329, &x350);
+ { uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x293, x332, &x353);
+ { uint8_t x355 = (x354 + x294);
+ { uint32_t x358; uint32_t x357 = _mulx_u32(x11, x15, &x358);
+ { uint32_t x361; uint32_t x360 = _mulx_u32(x11, x17, &x361);
+ { uint32_t x364; uint32_t x363 = _mulx_u32(x11, x19, &x364);
+ { uint32_t x367; uint32_t x366 = _mulx_u32(x11, x21, &x367);
+ { uint32_t x370; uint32_t x369 = _mulx_u32(x11, x23, &x370);
+ { uint32_t x373; uint32_t x372 = _mulx_u32(x11, x22, &x373);
+ { uint32_t x375; uint8_t x376 = _addcarryx_u32(0x0, x358, x360, &x375);
+ { uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
+ { uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x364, x366, &x381);
+ { uint32_t x384; uint8_t x385 = _addcarryx_u32(x382, x367, x369, &x384);
+ { uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x370, x372, &x387);
+ { uint32_t x390; uint8_t _ = _addcarryx_u32(0x0, x388, x373, &x390);
+ { uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x338, x357, &x393);
+ { uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
+ { uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
+ { uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x347, x381, &x402);
+ { uint32_t x405; uint8_t x406 = _addcarryx_u32(x403, x350, x384, &x405);
+ { uint32_t x408; uint8_t x409 = _addcarryx_u32(x406, x353, x387, &x408);
+ { uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x355, x390, &x411);
+ { uint32_t _; uint32_t x414 = _mulx_u32(x393, 0xc28f5c29, &_);
+ { uint32_t x418; uint32_t x417 = _mulx_u32(x414, 0xffffffe7, &x418);
+ { uint32_t x421; uint32_t x420 = _mulx_u32(x414, 0xffffffff, &x421);
+ { uint32_t x424; uint32_t x423 = _mulx_u32(x414, 0xffffffff, &x424);
+ { uint32_t x427; uint32_t x426 = _mulx_u32(x414, 0xffffffff, &x427);
+ { uint32_t x430; uint32_t x429 = _mulx_u32(x414, 0xffffffff, &x430);
+ { uint32_t x433; uint32_t x432 = _mulx_u32(x414, 0x1fffffff, &x433);
+ { uint32_t x435; uint8_t x436 = _addcarryx_u32(0x0, x418, x420, &x435);
+ { uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x421, x423, &x438);
+ { uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x424, x426, &x441);
+ { uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x427, x429, &x444);
+ { uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x430, x432, &x447);
+ { uint32_t x450; uint8_t _ = _addcarryx_u32(0x0, x448, x433, &x450);
+ { uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x393, x417, &_);
+ { uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x396, x435, &x456);
+ { uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x399, x438, &x459);
+ { uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x402, x441, &x462);
+ { uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x405, x444, &x465);
+ { uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x408, x447, &x468);
+ { uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x411, x450, &x471);
+ { uint8_t x473 = (x472 + x412);
+ { uint32_t x476; uint32_t x475 = _mulx_u32(x13, x15, &x476);
+ { uint32_t x479; uint32_t x478 = _mulx_u32(x13, x17, &x479);
+ { uint32_t x482; uint32_t x481 = _mulx_u32(x13, x19, &x482);
+ { uint32_t x485; uint32_t x484 = _mulx_u32(x13, x21, &x485);
+ { uint32_t x488; uint32_t x487 = _mulx_u32(x13, x23, &x488);
+ { uint32_t x491; uint32_t x490 = _mulx_u32(x13, x22, &x491);
+ { uint32_t x493; uint8_t x494 = _addcarryx_u32(0x0, x476, x478, &x493);
+ { uint32_t x496; uint8_t x497 = _addcarryx_u32(x494, x479, x481, &x496);
+ { uint32_t x499; uint8_t x500 = _addcarryx_u32(x497, x482, x484, &x499);
+ { uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x485, x487, &x502);
+ { uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x488, x490, &x505);
+ { uint32_t x508; uint8_t _ = _addcarryx_u32(0x0, x506, x491, &x508);
+ { uint32_t x511; uint8_t x512 = _addcarryx_u32(0x0, x456, x475, &x511);
+ { uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514);
+ { uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x462, x496, &x517);
+ { uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x465, x499, &x520);
+ { uint32_t x523; uint8_t x524 = _addcarryx_u32(x521, x468, x502, &x523);
+ { uint32_t x526; uint8_t x527 = _addcarryx_u32(x524, x471, x505, &x526);
+ { uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x473, x508, &x529);
+ { uint32_t _; uint32_t x532 = _mulx_u32(x511, 0xc28f5c29, &_);
+ { uint32_t x536; uint32_t x535 = _mulx_u32(x532, 0xffffffe7, &x536);
+ { uint32_t x539; uint32_t x538 = _mulx_u32(x532, 0xffffffff, &x539);
+ { uint32_t x542; uint32_t x541 = _mulx_u32(x532, 0xffffffff, &x542);
+ { uint32_t x545; uint32_t x544 = _mulx_u32(x532, 0xffffffff, &x545);
+ { uint32_t x548; uint32_t x547 = _mulx_u32(x532, 0xffffffff, &x548);
+ { uint32_t x551; uint32_t x550 = _mulx_u32(x532, 0x1fffffff, &x551);
+ { uint32_t x553; uint8_t x554 = _addcarryx_u32(0x0, x536, x538, &x553);
+ { uint32_t x556; uint8_t x557 = _addcarryx_u32(x554, x539, x541, &x556);
+ { uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x542, x544, &x559);
+ { uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x545, x547, &x562);
+ { uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x548, x550, &x565);
+ { uint32_t x568; uint8_t _ = _addcarryx_u32(0x0, x566, x551, &x568);
+ { uint32_t _; uint8_t x572 = _addcarryx_u32(0x0, x511, x535, &_);
+ { uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x514, x553, &x574);
+ { uint32_t x577; uint8_t x578 = _addcarryx_u32(x575, x517, x556, &x577);
+ { uint32_t x580; uint8_t x581 = _addcarryx_u32(x578, x520, x559, &x580);
+ { uint32_t x583; uint8_t x584 = _addcarryx_u32(x581, x523, x562, &x583);
+ { uint32_t x586; uint8_t x587 = _addcarryx_u32(x584, x526, x565, &x586);
+ { uint32_t x589; uint8_t x590 = _addcarryx_u32(x587, x529, x568, &x589);
+ { uint8_t x591 = (x590 + x530);
+ { uint32_t x594; uint32_t x593 = _mulx_u32(x12, x15, &x594);
+ { uint32_t x597; uint32_t x596 = _mulx_u32(x12, x17, &x597);
+ { uint32_t x600; uint32_t x599 = _mulx_u32(x12, x19, &x600);
+ { uint32_t x603; uint32_t x602 = _mulx_u32(x12, x21, &x603);
+ { uint32_t x606; uint32_t x605 = _mulx_u32(x12, x23, &x606);
+ { uint32_t x609; uint32_t x608 = _mulx_u32(x12, x22, &x609);
+ { uint32_t x611; uint8_t x612 = _addcarryx_u32(0x0, x594, x596, &x611);
+ { uint32_t x614; uint8_t x615 = _addcarryx_u32(x612, x597, x599, &x614);
+ { uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x600, x602, &x617);
+ { uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x603, x605, &x620);
+ { uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x606, x608, &x623);
+ { uint32_t x626; uint8_t _ = _addcarryx_u32(0x0, x624, x609, &x626);
+ { uint32_t x629; uint8_t x630 = _addcarryx_u32(0x0, x574, x593, &x629);
+ { uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x577, x611, &x632);
+ { uint32_t x635; uint8_t x636 = _addcarryx_u32(x633, x580, x614, &x635);
+ { uint32_t x638; uint8_t x639 = _addcarryx_u32(x636, x583, x617, &x638);
+ { uint32_t x641; uint8_t x642 = _addcarryx_u32(x639, x586, x620, &x641);
+ { uint32_t x644; uint8_t x645 = _addcarryx_u32(x642, x589, x623, &x644);
+ { uint32_t x647; uint8_t x648 = _addcarryx_u32(x645, x591, x626, &x647);
+ { uint32_t _; uint32_t x650 = _mulx_u32(x629, 0xc28f5c29, &_);
+ { uint32_t x654; uint32_t x653 = _mulx_u32(x650, 0xffffffe7, &x654);
+ { uint32_t x657; uint32_t x656 = _mulx_u32(x650, 0xffffffff, &x657);
+ { uint32_t x660; uint32_t x659 = _mulx_u32(x650, 0xffffffff, &x660);
+ { uint32_t x663; uint32_t x662 = _mulx_u32(x650, 0xffffffff, &x663);
+ { uint32_t x666; uint32_t x665 = _mulx_u32(x650, 0xffffffff, &x666);
+ { uint32_t x669; uint32_t x668 = _mulx_u32(x650, 0x1fffffff, &x669);
+ { uint32_t x671; uint8_t x672 = _addcarryx_u32(0x0, x654, x656, &x671);
+ { uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x657, x659, &x674);
+ { uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x660, x662, &x677);
+ { uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x663, x665, &x680);
+ { uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x666, x668, &x683);
+ { uint32_t x686; uint8_t _ = _addcarryx_u32(0x0, x684, x669, &x686);
+ { uint32_t _; uint8_t x690 = _addcarryx_u32(0x0, x629, x653, &_);
+ { uint32_t x692; uint8_t x693 = _addcarryx_u32(x690, x632, x671, &x692);
+ { uint32_t x695; uint8_t x696 = _addcarryx_u32(x693, x635, x674, &x695);
+ { uint32_t x698; uint8_t x699 = _addcarryx_u32(x696, x638, x677, &x698);
+ { uint32_t x701; uint8_t x702 = _addcarryx_u32(x699, x641, x680, &x701);
+ { uint32_t x704; uint8_t x705 = _addcarryx_u32(x702, x644, x683, &x704);
+ { uint32_t x707; uint8_t x708 = _addcarryx_u32(x705, x647, x686, &x707);
+ { uint8_t x709 = (x708 + x648);
+ { uint32_t x711; uint8_t x712 = _subborrow_u32(0x0, x692, 0xffffffe7, &x711);
+ { uint32_t x714; uint8_t x715 = _subborrow_u32(x712, x695, 0xffffffff, &x714);
+ { uint32_t x717; uint8_t x718 = _subborrow_u32(x715, x698, 0xffffffff, &x717);
+ { uint32_t x720; uint8_t x721 = _subborrow_u32(x718, x701, 0xffffffff, &x720);
+ { uint32_t x723; uint8_t x724 = _subborrow_u32(x721, x704, 0xffffffff, &x723);
+ { uint32_t x726; uint8_t x727 = _subborrow_u32(x724, x707, 0x1fffffff, &x726);
+ { uint32_t _; uint8_t x730 = _subborrow_u32(x727, x709, 0x0, &_);
+ { uint32_t x731 = cmovznz(x730, x726, x707);
+ { uint32_t x732 = cmovznz(x730, x723, x704);
+ { uint32_t x733 = cmovznz(x730, x720, x701);
+ { uint32_t x734 = cmovznz(x730, x717, x698);
+ { uint32_t x735 = cmovznz(x730, x714, x695);
+ { uint32_t x736 = cmovznz(x730, x711, x692);
+ out[0] = x736;
+ out[1] = x735;
+ out[2] = x734;
+ out[3] = x733;
+ out[4] = x732;
+ out[5] = x731;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e189m25/fenz.c b/src/Specific/montgomery32_2e189m25/fenz.c
index 6d8132b20..2e0454af1 100644
--- a/src/Specific/montgomery32_2e189m25/fenz.c
+++ b/src/Specific/montgomery32_2e189m25/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x11 = (x10 | x9);
-{ uint32_t x12 = (x8 | x11);
-{ uint32_t x13 = (x6 | x12);
-{ uint32_t x14 = (x4 | x13);
-{ uint32_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x11 = (x10 | x9);
+ { uint32_t x12 = (x8 | x11);
+ { uint32_t x13 = (x6 | x12);
+ { uint32_t x14 = (x4 | x13);
+ { uint32_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e189m25/feopp.c b/src/Specific/montgomery32_2e189m25/feopp.c
index e9276231c..70b81e9b8 100644
--- a/src/Specific/montgomery32_2e189m25/feopp.c
+++ b/src/Specific/montgomery32_2e189m25/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
-{ uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
-{ uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
-{ uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
-{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
-{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
-{ uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
-{ uint32_t x30 = (x29 & 0xffffffe7);
-{ uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
-{ uint32_t x34 = (x29 & 0xffffffff);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
-{ uint32_t x38 = (x29 & 0xffffffff);
-{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
-{ uint32_t x42 = (x29 & 0xffffffff);
-{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
-{ uint32_t x46 = (x29 & 0xffffffff);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
-{ uint32_t x50 = (x29 & 0x1fffffff);
-{ uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
+ { uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
+ { uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
+ { uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
+ { uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
+ { uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
+ { uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+ { uint32_t x30 = (x29 & 0xffffffe7);
+ { uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
+ { uint32_t x34 = (x29 & 0xffffffff);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
+ { uint32_t x38 = (x29 & 0xffffffff);
+ { uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
+ { uint32_t x42 = (x29 & 0xffffffff);
+ { uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
+ { uint32_t x46 = (x29 & 0xffffffff);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
+ { uint32_t x50 = (x29 & 0x1fffffff);
+ { uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e189m25/fesub.c b/src/Specific/montgomery32_2e189m25/fesub.c
index 80ae99e51..3b721c799 100644
--- a/src/Specific/montgomery32_2e189m25/fesub.c
+++ b/src/Specific/montgomery32_2e189m25/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
-{ uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
-{ uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
-{ uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
-{ uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
-{ uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
-{ uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
-{ uint32_t x43 = (x42 & 0xffffffe7);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
-{ uint32_t x47 = (x42 & 0xffffffff);
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
-{ uint32_t x51 = (x42 & 0xffffffff);
-{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
-{ uint32_t x55 = (x42 & 0xffffffff);
-{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
-{ uint32_t x59 = (x42 & 0xffffffff);
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
-{ uint32_t x63 = (x42 & 0x1fffffff);
-{ uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
+ { uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
+ { uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
+ { uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
+ { uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
+ { uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
+ { uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
+ { uint32_t x43 = (x42 & 0xffffffe7);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
+ { uint32_t x47 = (x42 & 0xffffffff);
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
+ { uint32_t x51 = (x42 & 0xffffffff);
+ { uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
+ { uint32_t x55 = (x42 & 0xffffffff);
+ { uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
+ { uint32_t x59 = (x42 & 0xffffffff);
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
+ { uint32_t x63 = (x42 & 0x1fffffff);
+ { uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e190m11/feadd.c b/src/Specific/montgomery32_2e190m11/feadd.c
index 5b9de603a..95eaecbb3 100644
--- a/src/Specific/montgomery32_2e190m11/feadd.c
+++ b/src/Specific/montgomery32_2e190m11/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
-{ uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
-{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
-{ uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
-{ uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
-{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
-{ uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xfffffff5, &x43);
-{ uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
-{ uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
-{ uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
-{ uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
-{ uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x3fffffff, &x58);
-{ uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
-{ uint32_t x63 = cmovznz(x62, x58, x40);
-{ uint32_t x64 = cmovznz(x62, x55, x37);
-{ uint32_t x65 = cmovznz(x62, x52, x34);
-{ uint32_t x66 = cmovznz(x62, x49, x31);
-{ uint32_t x67 = cmovznz(x62, x46, x28);
-{ uint32_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
+ { uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
+ { uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
+ { uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
+ { uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
+ { uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
+ { uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xfffffff5, &x43);
+ { uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
+ { uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
+ { uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
+ { uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
+ { uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x3fffffff, &x58);
+ { uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
+ { uint32_t x63 = cmovznz(x62, x58, x40);
+ { uint32_t x64 = cmovznz(x62, x55, x37);
+ { uint32_t x65 = cmovznz(x62, x52, x34);
+ { uint32_t x66 = cmovznz(x62, x49, x31);
+ { uint32_t x67 = cmovznz(x62, x46, x28);
+ { uint32_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e190m11/femul.c b/src/Specific/montgomery32_2e190m11/femul.c
index 5305f497b..456c753e9 100644
--- a/src/Specific/montgomery32_2e190m11/femul.c
+++ b/src/Specific/montgomery32_2e190m11/femul.c
@@ -1,272 +1,266 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
-{ uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
-{ uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
-{ uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
-{ uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
-{ uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
-{ uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
-{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
-{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
-{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
-{ uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
-{ uint32_t _; uint32_t x61 = _mulx_u32(x25, 0xba2e8ba3, &_);
-{ uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xfffffff5, &x65);
-{ uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
-{ uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
-{ uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
-{ uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
-{ uint32_t x80; uint32_t x79 = _mulx_u32(x61, 0x3fffffff, &x80);
-{ uint32_t x82; uint8_t x83 = _addcarryx_u32(0x0, x65, x67, &x82);
-{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
-{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
-{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
-{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x77, x79, &x94);
-{ uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x80, &x97);
-{ uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x25, x64, &_);
-{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x43, x82, &x103);
-{ uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x46, x85, &x106);
-{ uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x49, x88, &x109);
-{ uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x52, x91, &x112);
-{ uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x55, x94, &x115);
-{ uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x58, x97, &x118);
-{ uint32_t x122; uint32_t x121 = _mulx_u32(x7, x15, &x122);
-{ uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125);
-{ uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128);
-{ uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131);
-{ uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134);
-{ uint32_t x137; uint32_t x136 = _mulx_u32(x7, x22, &x137);
-{ uint32_t x139; uint8_t x140 = _addcarryx_u32(0x0, x122, x124, &x139);
-{ uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142);
-{ uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145);
-{ uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148);
-{ uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x134, x136, &x151);
-{ uint32_t x154; uint8_t _ = _addcarryx_u32(0x0, x152, x137, &x154);
-{ uint32_t x157; uint8_t x158 = _addcarryx_u32(0x0, x103, x121, &x157);
-{ uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160);
-{ uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163);
-{ uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166);
-{ uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169);
-{ uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x118, x151, &x172);
-{ uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x119, x154, &x175);
-{ uint32_t _; uint32_t x178 = _mulx_u32(x157, 0xba2e8ba3, &_);
-{ uint32_t x182; uint32_t x181 = _mulx_u32(x178, 0xfffffff5, &x182);
-{ uint32_t x185; uint32_t x184 = _mulx_u32(x178, 0xffffffff, &x185);
-{ uint32_t x188; uint32_t x187 = _mulx_u32(x178, 0xffffffff, &x188);
-{ uint32_t x191; uint32_t x190 = _mulx_u32(x178, 0xffffffff, &x191);
-{ uint32_t x194; uint32_t x193 = _mulx_u32(x178, 0xffffffff, &x194);
-{ uint32_t x197; uint32_t x196 = _mulx_u32(x178, 0x3fffffff, &x197);
-{ uint32_t x199; uint8_t x200 = _addcarryx_u32(0x0, x182, x184, &x199);
-{ uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202);
-{ uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205);
-{ uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x191, x193, &x208);
-{ uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x194, x196, &x211);
-{ uint32_t x214; uint8_t _ = _addcarryx_u32(0x0, x212, x197, &x214);
-{ uint32_t _; uint8_t x218 = _addcarryx_u32(0x0, x157, x181, &_);
-{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x160, x199, &x220);
-{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x163, x202, &x223);
-{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x166, x205, &x226);
-{ uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x169, x208, &x229);
-{ uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x172, x211, &x232);
-{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x175, x214, &x235);
-{ uint8_t x237 = (x236 + x176);
-{ uint32_t x240; uint32_t x239 = _mulx_u32(x9, x15, &x240);
-{ uint32_t x243; uint32_t x242 = _mulx_u32(x9, x17, &x243);
-{ uint32_t x246; uint32_t x245 = _mulx_u32(x9, x19, &x246);
-{ uint32_t x249; uint32_t x248 = _mulx_u32(x9, x21, &x249);
-{ uint32_t x252; uint32_t x251 = _mulx_u32(x9, x23, &x252);
-{ uint32_t x255; uint32_t x254 = _mulx_u32(x9, x22, &x255);
-{ uint32_t x257; uint8_t x258 = _addcarryx_u32(0x0, x240, x242, &x257);
-{ uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260);
-{ uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263);
-{ uint32_t x266; uint8_t x267 = _addcarryx_u32(x264, x249, x251, &x266);
-{ uint32_t x269; uint8_t x270 = _addcarryx_u32(x267, x252, x254, &x269);
-{ uint32_t x272; uint8_t _ = _addcarryx_u32(0x0, x270, x255, &x272);
-{ uint32_t x275; uint8_t x276 = _addcarryx_u32(0x0, x220, x239, &x275);
-{ uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278);
-{ uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281);
-{ uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284);
-{ uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x232, x266, &x287);
-{ uint32_t x290; uint8_t x291 = _addcarryx_u32(x288, x235, x269, &x290);
-{ uint32_t x293; uint8_t x294 = _addcarryx_u32(x291, x237, x272, &x293);
-{ uint32_t _; uint32_t x296 = _mulx_u32(x275, 0xba2e8ba3, &_);
-{ uint32_t x300; uint32_t x299 = _mulx_u32(x296, 0xfffffff5, &x300);
-{ uint32_t x303; uint32_t x302 = _mulx_u32(x296, 0xffffffff, &x303);
-{ uint32_t x306; uint32_t x305 = _mulx_u32(x296, 0xffffffff, &x306);
-{ uint32_t x309; uint32_t x308 = _mulx_u32(x296, 0xffffffff, &x309);
-{ uint32_t x312; uint32_t x311 = _mulx_u32(x296, 0xffffffff, &x312);
-{ uint32_t x315; uint32_t x314 = _mulx_u32(x296, 0x3fffffff, &x315);
-{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x300, x302, &x317);
-{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320);
-{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x306, x308, &x323);
-{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x309, x311, &x326);
-{ uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x312, x314, &x329);
-{ uint32_t x332; uint8_t _ = _addcarryx_u32(0x0, x330, x315, &x332);
-{ uint32_t _; uint8_t x336 = _addcarryx_u32(0x0, x275, x299, &_);
-{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x278, x317, &x338);
-{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x281, x320, &x341);
-{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x284, x323, &x344);
-{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x287, x326, &x347);
-{ uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x290, x329, &x350);
-{ uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x293, x332, &x353);
-{ uint8_t x355 = (x354 + x294);
-{ uint32_t x358; uint32_t x357 = _mulx_u32(x11, x15, &x358);
-{ uint32_t x361; uint32_t x360 = _mulx_u32(x11, x17, &x361);
-{ uint32_t x364; uint32_t x363 = _mulx_u32(x11, x19, &x364);
-{ uint32_t x367; uint32_t x366 = _mulx_u32(x11, x21, &x367);
-{ uint32_t x370; uint32_t x369 = _mulx_u32(x11, x23, &x370);
-{ uint32_t x373; uint32_t x372 = _mulx_u32(x11, x22, &x373);
-{ uint32_t x375; uint8_t x376 = _addcarryx_u32(0x0, x358, x360, &x375);
-{ uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
-{ uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x364, x366, &x381);
-{ uint32_t x384; uint8_t x385 = _addcarryx_u32(x382, x367, x369, &x384);
-{ uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x370, x372, &x387);
-{ uint32_t x390; uint8_t _ = _addcarryx_u32(0x0, x388, x373, &x390);
-{ uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x338, x357, &x393);
-{ uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
-{ uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
-{ uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x347, x381, &x402);
-{ uint32_t x405; uint8_t x406 = _addcarryx_u32(x403, x350, x384, &x405);
-{ uint32_t x408; uint8_t x409 = _addcarryx_u32(x406, x353, x387, &x408);
-{ uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x355, x390, &x411);
-{ uint32_t _; uint32_t x414 = _mulx_u32(x393, 0xba2e8ba3, &_);
-{ uint32_t x418; uint32_t x417 = _mulx_u32(x414, 0xfffffff5, &x418);
-{ uint32_t x421; uint32_t x420 = _mulx_u32(x414, 0xffffffff, &x421);
-{ uint32_t x424; uint32_t x423 = _mulx_u32(x414, 0xffffffff, &x424);
-{ uint32_t x427; uint32_t x426 = _mulx_u32(x414, 0xffffffff, &x427);
-{ uint32_t x430; uint32_t x429 = _mulx_u32(x414, 0xffffffff, &x430);
-{ uint32_t x433; uint32_t x432 = _mulx_u32(x414, 0x3fffffff, &x433);
-{ uint32_t x435; uint8_t x436 = _addcarryx_u32(0x0, x418, x420, &x435);
-{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x421, x423, &x438);
-{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x424, x426, &x441);
-{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x427, x429, &x444);
-{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x430, x432, &x447);
-{ uint32_t x450; uint8_t _ = _addcarryx_u32(0x0, x448, x433, &x450);
-{ uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x393, x417, &_);
-{ uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x396, x435, &x456);
-{ uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x399, x438, &x459);
-{ uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x402, x441, &x462);
-{ uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x405, x444, &x465);
-{ uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x408, x447, &x468);
-{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x411, x450, &x471);
-{ uint8_t x473 = (x472 + x412);
-{ uint32_t x476; uint32_t x475 = _mulx_u32(x13, x15, &x476);
-{ uint32_t x479; uint32_t x478 = _mulx_u32(x13, x17, &x479);
-{ uint32_t x482; uint32_t x481 = _mulx_u32(x13, x19, &x482);
-{ uint32_t x485; uint32_t x484 = _mulx_u32(x13, x21, &x485);
-{ uint32_t x488; uint32_t x487 = _mulx_u32(x13, x23, &x488);
-{ uint32_t x491; uint32_t x490 = _mulx_u32(x13, x22, &x491);
-{ uint32_t x493; uint8_t x494 = _addcarryx_u32(0x0, x476, x478, &x493);
-{ uint32_t x496; uint8_t x497 = _addcarryx_u32(x494, x479, x481, &x496);
-{ uint32_t x499; uint8_t x500 = _addcarryx_u32(x497, x482, x484, &x499);
-{ uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x485, x487, &x502);
-{ uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x488, x490, &x505);
-{ uint32_t x508; uint8_t _ = _addcarryx_u32(0x0, x506, x491, &x508);
-{ uint32_t x511; uint8_t x512 = _addcarryx_u32(0x0, x456, x475, &x511);
-{ uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514);
-{ uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x462, x496, &x517);
-{ uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x465, x499, &x520);
-{ uint32_t x523; uint8_t x524 = _addcarryx_u32(x521, x468, x502, &x523);
-{ uint32_t x526; uint8_t x527 = _addcarryx_u32(x524, x471, x505, &x526);
-{ uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x473, x508, &x529);
-{ uint32_t _; uint32_t x532 = _mulx_u32(x511, 0xba2e8ba3, &_);
-{ uint32_t x536; uint32_t x535 = _mulx_u32(x532, 0xfffffff5, &x536);
-{ uint32_t x539; uint32_t x538 = _mulx_u32(x532, 0xffffffff, &x539);
-{ uint32_t x542; uint32_t x541 = _mulx_u32(x532, 0xffffffff, &x542);
-{ uint32_t x545; uint32_t x544 = _mulx_u32(x532, 0xffffffff, &x545);
-{ uint32_t x548; uint32_t x547 = _mulx_u32(x532, 0xffffffff, &x548);
-{ uint32_t x551; uint32_t x550 = _mulx_u32(x532, 0x3fffffff, &x551);
-{ uint32_t x553; uint8_t x554 = _addcarryx_u32(0x0, x536, x538, &x553);
-{ uint32_t x556; uint8_t x557 = _addcarryx_u32(x554, x539, x541, &x556);
-{ uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x542, x544, &x559);
-{ uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x545, x547, &x562);
-{ uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x548, x550, &x565);
-{ uint32_t x568; uint8_t _ = _addcarryx_u32(0x0, x566, x551, &x568);
-{ uint32_t _; uint8_t x572 = _addcarryx_u32(0x0, x511, x535, &_);
-{ uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x514, x553, &x574);
-{ uint32_t x577; uint8_t x578 = _addcarryx_u32(x575, x517, x556, &x577);
-{ uint32_t x580; uint8_t x581 = _addcarryx_u32(x578, x520, x559, &x580);
-{ uint32_t x583; uint8_t x584 = _addcarryx_u32(x581, x523, x562, &x583);
-{ uint32_t x586; uint8_t x587 = _addcarryx_u32(x584, x526, x565, &x586);
-{ uint32_t x589; uint8_t x590 = _addcarryx_u32(x587, x529, x568, &x589);
-{ uint8_t x591 = (x590 + x530);
-{ uint32_t x594; uint32_t x593 = _mulx_u32(x12, x15, &x594);
-{ uint32_t x597; uint32_t x596 = _mulx_u32(x12, x17, &x597);
-{ uint32_t x600; uint32_t x599 = _mulx_u32(x12, x19, &x600);
-{ uint32_t x603; uint32_t x602 = _mulx_u32(x12, x21, &x603);
-{ uint32_t x606; uint32_t x605 = _mulx_u32(x12, x23, &x606);
-{ uint32_t x609; uint32_t x608 = _mulx_u32(x12, x22, &x609);
-{ uint32_t x611; uint8_t x612 = _addcarryx_u32(0x0, x594, x596, &x611);
-{ uint32_t x614; uint8_t x615 = _addcarryx_u32(x612, x597, x599, &x614);
-{ uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x600, x602, &x617);
-{ uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x603, x605, &x620);
-{ uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x606, x608, &x623);
-{ uint32_t x626; uint8_t _ = _addcarryx_u32(0x0, x624, x609, &x626);
-{ uint32_t x629; uint8_t x630 = _addcarryx_u32(0x0, x574, x593, &x629);
-{ uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x577, x611, &x632);
-{ uint32_t x635; uint8_t x636 = _addcarryx_u32(x633, x580, x614, &x635);
-{ uint32_t x638; uint8_t x639 = _addcarryx_u32(x636, x583, x617, &x638);
-{ uint32_t x641; uint8_t x642 = _addcarryx_u32(x639, x586, x620, &x641);
-{ uint32_t x644; uint8_t x645 = _addcarryx_u32(x642, x589, x623, &x644);
-{ uint32_t x647; uint8_t x648 = _addcarryx_u32(x645, x591, x626, &x647);
-{ uint32_t _; uint32_t x650 = _mulx_u32(x629, 0xba2e8ba3, &_);
-{ uint32_t x654; uint32_t x653 = _mulx_u32(x650, 0xfffffff5, &x654);
-{ uint32_t x657; uint32_t x656 = _mulx_u32(x650, 0xffffffff, &x657);
-{ uint32_t x660; uint32_t x659 = _mulx_u32(x650, 0xffffffff, &x660);
-{ uint32_t x663; uint32_t x662 = _mulx_u32(x650, 0xffffffff, &x663);
-{ uint32_t x666; uint32_t x665 = _mulx_u32(x650, 0xffffffff, &x666);
-{ uint32_t x669; uint32_t x668 = _mulx_u32(x650, 0x3fffffff, &x669);
-{ uint32_t x671; uint8_t x672 = _addcarryx_u32(0x0, x654, x656, &x671);
-{ uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x657, x659, &x674);
-{ uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x660, x662, &x677);
-{ uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x663, x665, &x680);
-{ uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x666, x668, &x683);
-{ uint32_t x686; uint8_t _ = _addcarryx_u32(0x0, x684, x669, &x686);
-{ uint32_t _; uint8_t x690 = _addcarryx_u32(0x0, x629, x653, &_);
-{ uint32_t x692; uint8_t x693 = _addcarryx_u32(x690, x632, x671, &x692);
-{ uint32_t x695; uint8_t x696 = _addcarryx_u32(x693, x635, x674, &x695);
-{ uint32_t x698; uint8_t x699 = _addcarryx_u32(x696, x638, x677, &x698);
-{ uint32_t x701; uint8_t x702 = _addcarryx_u32(x699, x641, x680, &x701);
-{ uint32_t x704; uint8_t x705 = _addcarryx_u32(x702, x644, x683, &x704);
-{ uint32_t x707; uint8_t x708 = _addcarryx_u32(x705, x647, x686, &x707);
-{ uint8_t x709 = (x708 + x648);
-{ uint32_t x711; uint8_t x712 = _subborrow_u32(0x0, x692, 0xfffffff5, &x711);
-{ uint32_t x714; uint8_t x715 = _subborrow_u32(x712, x695, 0xffffffff, &x714);
-{ uint32_t x717; uint8_t x718 = _subborrow_u32(x715, x698, 0xffffffff, &x717);
-{ uint32_t x720; uint8_t x721 = _subborrow_u32(x718, x701, 0xffffffff, &x720);
-{ uint32_t x723; uint8_t x724 = _subborrow_u32(x721, x704, 0xffffffff, &x723);
-{ uint32_t x726; uint8_t x727 = _subborrow_u32(x724, x707, 0x3fffffff, &x726);
-{ uint32_t _; uint8_t x730 = _subborrow_u32(x727, x709, 0x0, &_);
-{ uint32_t x731 = cmovznz(x730, x726, x707);
-{ uint32_t x732 = cmovznz(x730, x723, x704);
-{ uint32_t x733 = cmovznz(x730, x720, x701);
-{ uint32_t x734 = cmovznz(x730, x717, x698);
-{ uint32_t x735 = cmovznz(x730, x714, x695);
-{ uint32_t x736 = cmovznz(x730, x711, x692);
-out[0] = x731;
-out[1] = x732;
-out[2] = x733;
-out[3] = x734;
-out[4] = x735;
-out[5] = x736;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
+ { uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
+ { uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
+ { uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
+ { uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
+ { uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
+ { uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
+ { uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
+ { uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
+ { uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
+ { uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
+ { uint32_t _; uint32_t x61 = _mulx_u32(x25, 0xba2e8ba3, &_);
+ { uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xfffffff5, &x65);
+ { uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
+ { uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
+ { uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
+ { uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
+ { uint32_t x80; uint32_t x79 = _mulx_u32(x61, 0x3fffffff, &x80);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(0x0, x65, x67, &x82);
+ { uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
+ { uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
+ { uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
+ { uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x77, x79, &x94);
+ { uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x80, &x97);
+ { uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x25, x64, &_);
+ { uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x43, x82, &x103);
+ { uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x46, x85, &x106);
+ { uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x49, x88, &x109);
+ { uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x52, x91, &x112);
+ { uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x55, x94, &x115);
+ { uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x58, x97, &x118);
+ { uint32_t x122; uint32_t x121 = _mulx_u32(x7, x15, &x122);
+ { uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125);
+ { uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128);
+ { uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131);
+ { uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134);
+ { uint32_t x137; uint32_t x136 = _mulx_u32(x7, x22, &x137);
+ { uint32_t x139; uint8_t x140 = _addcarryx_u32(0x0, x122, x124, &x139);
+ { uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142);
+ { uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145);
+ { uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148);
+ { uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x134, x136, &x151);
+ { uint32_t x154; uint8_t _ = _addcarryx_u32(0x0, x152, x137, &x154);
+ { uint32_t x157; uint8_t x158 = _addcarryx_u32(0x0, x103, x121, &x157);
+ { uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160);
+ { uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163);
+ { uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166);
+ { uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169);
+ { uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x118, x151, &x172);
+ { uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x119, x154, &x175);
+ { uint32_t _; uint32_t x178 = _mulx_u32(x157, 0xba2e8ba3, &_);
+ { uint32_t x182; uint32_t x181 = _mulx_u32(x178, 0xfffffff5, &x182);
+ { uint32_t x185; uint32_t x184 = _mulx_u32(x178, 0xffffffff, &x185);
+ { uint32_t x188; uint32_t x187 = _mulx_u32(x178, 0xffffffff, &x188);
+ { uint32_t x191; uint32_t x190 = _mulx_u32(x178, 0xffffffff, &x191);
+ { uint32_t x194; uint32_t x193 = _mulx_u32(x178, 0xffffffff, &x194);
+ { uint32_t x197; uint32_t x196 = _mulx_u32(x178, 0x3fffffff, &x197);
+ { uint32_t x199; uint8_t x200 = _addcarryx_u32(0x0, x182, x184, &x199);
+ { uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202);
+ { uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205);
+ { uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x191, x193, &x208);
+ { uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x194, x196, &x211);
+ { uint32_t x214; uint8_t _ = _addcarryx_u32(0x0, x212, x197, &x214);
+ { uint32_t _; uint8_t x218 = _addcarryx_u32(0x0, x157, x181, &_);
+ { uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x160, x199, &x220);
+ { uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x163, x202, &x223);
+ { uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x166, x205, &x226);
+ { uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x169, x208, &x229);
+ { uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x172, x211, &x232);
+ { uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x175, x214, &x235);
+ { uint8_t x237 = (x236 + x176);
+ { uint32_t x240; uint32_t x239 = _mulx_u32(x9, x15, &x240);
+ { uint32_t x243; uint32_t x242 = _mulx_u32(x9, x17, &x243);
+ { uint32_t x246; uint32_t x245 = _mulx_u32(x9, x19, &x246);
+ { uint32_t x249; uint32_t x248 = _mulx_u32(x9, x21, &x249);
+ { uint32_t x252; uint32_t x251 = _mulx_u32(x9, x23, &x252);
+ { uint32_t x255; uint32_t x254 = _mulx_u32(x9, x22, &x255);
+ { uint32_t x257; uint8_t x258 = _addcarryx_u32(0x0, x240, x242, &x257);
+ { uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260);
+ { uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263);
+ { uint32_t x266; uint8_t x267 = _addcarryx_u32(x264, x249, x251, &x266);
+ { uint32_t x269; uint8_t x270 = _addcarryx_u32(x267, x252, x254, &x269);
+ { uint32_t x272; uint8_t _ = _addcarryx_u32(0x0, x270, x255, &x272);
+ { uint32_t x275; uint8_t x276 = _addcarryx_u32(0x0, x220, x239, &x275);
+ { uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278);
+ { uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281);
+ { uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284);
+ { uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x232, x266, &x287);
+ { uint32_t x290; uint8_t x291 = _addcarryx_u32(x288, x235, x269, &x290);
+ { uint32_t x293; uint8_t x294 = _addcarryx_u32(x291, x237, x272, &x293);
+ { uint32_t _; uint32_t x296 = _mulx_u32(x275, 0xba2e8ba3, &_);
+ { uint32_t x300; uint32_t x299 = _mulx_u32(x296, 0xfffffff5, &x300);
+ { uint32_t x303; uint32_t x302 = _mulx_u32(x296, 0xffffffff, &x303);
+ { uint32_t x306; uint32_t x305 = _mulx_u32(x296, 0xffffffff, &x306);
+ { uint32_t x309; uint32_t x308 = _mulx_u32(x296, 0xffffffff, &x309);
+ { uint32_t x312; uint32_t x311 = _mulx_u32(x296, 0xffffffff, &x312);
+ { uint32_t x315; uint32_t x314 = _mulx_u32(x296, 0x3fffffff, &x315);
+ { uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x300, x302, &x317);
+ { uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320);
+ { uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x306, x308, &x323);
+ { uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x309, x311, &x326);
+ { uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x312, x314, &x329);
+ { uint32_t x332; uint8_t _ = _addcarryx_u32(0x0, x330, x315, &x332);
+ { uint32_t _; uint8_t x336 = _addcarryx_u32(0x0, x275, x299, &_);
+ { uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x278, x317, &x338);
+ { uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x281, x320, &x341);
+ { uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x284, x323, &x344);
+ { uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x287, x326, &x347);
+ { uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x290, x329, &x350);
+ { uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x293, x332, &x353);
+ { uint8_t x355 = (x354 + x294);
+ { uint32_t x358; uint32_t x357 = _mulx_u32(x11, x15, &x358);
+ { uint32_t x361; uint32_t x360 = _mulx_u32(x11, x17, &x361);
+ { uint32_t x364; uint32_t x363 = _mulx_u32(x11, x19, &x364);
+ { uint32_t x367; uint32_t x366 = _mulx_u32(x11, x21, &x367);
+ { uint32_t x370; uint32_t x369 = _mulx_u32(x11, x23, &x370);
+ { uint32_t x373; uint32_t x372 = _mulx_u32(x11, x22, &x373);
+ { uint32_t x375; uint8_t x376 = _addcarryx_u32(0x0, x358, x360, &x375);
+ { uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
+ { uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x364, x366, &x381);
+ { uint32_t x384; uint8_t x385 = _addcarryx_u32(x382, x367, x369, &x384);
+ { uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x370, x372, &x387);
+ { uint32_t x390; uint8_t _ = _addcarryx_u32(0x0, x388, x373, &x390);
+ { uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x338, x357, &x393);
+ { uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
+ { uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
+ { uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x347, x381, &x402);
+ { uint32_t x405; uint8_t x406 = _addcarryx_u32(x403, x350, x384, &x405);
+ { uint32_t x408; uint8_t x409 = _addcarryx_u32(x406, x353, x387, &x408);
+ { uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x355, x390, &x411);
+ { uint32_t _; uint32_t x414 = _mulx_u32(x393, 0xba2e8ba3, &_);
+ { uint32_t x418; uint32_t x417 = _mulx_u32(x414, 0xfffffff5, &x418);
+ { uint32_t x421; uint32_t x420 = _mulx_u32(x414, 0xffffffff, &x421);
+ { uint32_t x424; uint32_t x423 = _mulx_u32(x414, 0xffffffff, &x424);
+ { uint32_t x427; uint32_t x426 = _mulx_u32(x414, 0xffffffff, &x427);
+ { uint32_t x430; uint32_t x429 = _mulx_u32(x414, 0xffffffff, &x430);
+ { uint32_t x433; uint32_t x432 = _mulx_u32(x414, 0x3fffffff, &x433);
+ { uint32_t x435; uint8_t x436 = _addcarryx_u32(0x0, x418, x420, &x435);
+ { uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x421, x423, &x438);
+ { uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x424, x426, &x441);
+ { uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x427, x429, &x444);
+ { uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x430, x432, &x447);
+ { uint32_t x450; uint8_t _ = _addcarryx_u32(0x0, x448, x433, &x450);
+ { uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x393, x417, &_);
+ { uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x396, x435, &x456);
+ { uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x399, x438, &x459);
+ { uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x402, x441, &x462);
+ { uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x405, x444, &x465);
+ { uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x408, x447, &x468);
+ { uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x411, x450, &x471);
+ { uint8_t x473 = (x472 + x412);
+ { uint32_t x476; uint32_t x475 = _mulx_u32(x13, x15, &x476);
+ { uint32_t x479; uint32_t x478 = _mulx_u32(x13, x17, &x479);
+ { uint32_t x482; uint32_t x481 = _mulx_u32(x13, x19, &x482);
+ { uint32_t x485; uint32_t x484 = _mulx_u32(x13, x21, &x485);
+ { uint32_t x488; uint32_t x487 = _mulx_u32(x13, x23, &x488);
+ { uint32_t x491; uint32_t x490 = _mulx_u32(x13, x22, &x491);
+ { uint32_t x493; uint8_t x494 = _addcarryx_u32(0x0, x476, x478, &x493);
+ { uint32_t x496; uint8_t x497 = _addcarryx_u32(x494, x479, x481, &x496);
+ { uint32_t x499; uint8_t x500 = _addcarryx_u32(x497, x482, x484, &x499);
+ { uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x485, x487, &x502);
+ { uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x488, x490, &x505);
+ { uint32_t x508; uint8_t _ = _addcarryx_u32(0x0, x506, x491, &x508);
+ { uint32_t x511; uint8_t x512 = _addcarryx_u32(0x0, x456, x475, &x511);
+ { uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514);
+ { uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x462, x496, &x517);
+ { uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x465, x499, &x520);
+ { uint32_t x523; uint8_t x524 = _addcarryx_u32(x521, x468, x502, &x523);
+ { uint32_t x526; uint8_t x527 = _addcarryx_u32(x524, x471, x505, &x526);
+ { uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x473, x508, &x529);
+ { uint32_t _; uint32_t x532 = _mulx_u32(x511, 0xba2e8ba3, &_);
+ { uint32_t x536; uint32_t x535 = _mulx_u32(x532, 0xfffffff5, &x536);
+ { uint32_t x539; uint32_t x538 = _mulx_u32(x532, 0xffffffff, &x539);
+ { uint32_t x542; uint32_t x541 = _mulx_u32(x532, 0xffffffff, &x542);
+ { uint32_t x545; uint32_t x544 = _mulx_u32(x532, 0xffffffff, &x545);
+ { uint32_t x548; uint32_t x547 = _mulx_u32(x532, 0xffffffff, &x548);
+ { uint32_t x551; uint32_t x550 = _mulx_u32(x532, 0x3fffffff, &x551);
+ { uint32_t x553; uint8_t x554 = _addcarryx_u32(0x0, x536, x538, &x553);
+ { uint32_t x556; uint8_t x557 = _addcarryx_u32(x554, x539, x541, &x556);
+ { uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x542, x544, &x559);
+ { uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x545, x547, &x562);
+ { uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x548, x550, &x565);
+ { uint32_t x568; uint8_t _ = _addcarryx_u32(0x0, x566, x551, &x568);
+ { uint32_t _; uint8_t x572 = _addcarryx_u32(0x0, x511, x535, &_);
+ { uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x514, x553, &x574);
+ { uint32_t x577; uint8_t x578 = _addcarryx_u32(x575, x517, x556, &x577);
+ { uint32_t x580; uint8_t x581 = _addcarryx_u32(x578, x520, x559, &x580);
+ { uint32_t x583; uint8_t x584 = _addcarryx_u32(x581, x523, x562, &x583);
+ { uint32_t x586; uint8_t x587 = _addcarryx_u32(x584, x526, x565, &x586);
+ { uint32_t x589; uint8_t x590 = _addcarryx_u32(x587, x529, x568, &x589);
+ { uint8_t x591 = (x590 + x530);
+ { uint32_t x594; uint32_t x593 = _mulx_u32(x12, x15, &x594);
+ { uint32_t x597; uint32_t x596 = _mulx_u32(x12, x17, &x597);
+ { uint32_t x600; uint32_t x599 = _mulx_u32(x12, x19, &x600);
+ { uint32_t x603; uint32_t x602 = _mulx_u32(x12, x21, &x603);
+ { uint32_t x606; uint32_t x605 = _mulx_u32(x12, x23, &x606);
+ { uint32_t x609; uint32_t x608 = _mulx_u32(x12, x22, &x609);
+ { uint32_t x611; uint8_t x612 = _addcarryx_u32(0x0, x594, x596, &x611);
+ { uint32_t x614; uint8_t x615 = _addcarryx_u32(x612, x597, x599, &x614);
+ { uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x600, x602, &x617);
+ { uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x603, x605, &x620);
+ { uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x606, x608, &x623);
+ { uint32_t x626; uint8_t _ = _addcarryx_u32(0x0, x624, x609, &x626);
+ { uint32_t x629; uint8_t x630 = _addcarryx_u32(0x0, x574, x593, &x629);
+ { uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x577, x611, &x632);
+ { uint32_t x635; uint8_t x636 = _addcarryx_u32(x633, x580, x614, &x635);
+ { uint32_t x638; uint8_t x639 = _addcarryx_u32(x636, x583, x617, &x638);
+ { uint32_t x641; uint8_t x642 = _addcarryx_u32(x639, x586, x620, &x641);
+ { uint32_t x644; uint8_t x645 = _addcarryx_u32(x642, x589, x623, &x644);
+ { uint32_t x647; uint8_t x648 = _addcarryx_u32(x645, x591, x626, &x647);
+ { uint32_t _; uint32_t x650 = _mulx_u32(x629, 0xba2e8ba3, &_);
+ { uint32_t x654; uint32_t x653 = _mulx_u32(x650, 0xfffffff5, &x654);
+ { uint32_t x657; uint32_t x656 = _mulx_u32(x650, 0xffffffff, &x657);
+ { uint32_t x660; uint32_t x659 = _mulx_u32(x650, 0xffffffff, &x660);
+ { uint32_t x663; uint32_t x662 = _mulx_u32(x650, 0xffffffff, &x663);
+ { uint32_t x666; uint32_t x665 = _mulx_u32(x650, 0xffffffff, &x666);
+ { uint32_t x669; uint32_t x668 = _mulx_u32(x650, 0x3fffffff, &x669);
+ { uint32_t x671; uint8_t x672 = _addcarryx_u32(0x0, x654, x656, &x671);
+ { uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x657, x659, &x674);
+ { uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x660, x662, &x677);
+ { uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x663, x665, &x680);
+ { uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x666, x668, &x683);
+ { uint32_t x686; uint8_t _ = _addcarryx_u32(0x0, x684, x669, &x686);
+ { uint32_t _; uint8_t x690 = _addcarryx_u32(0x0, x629, x653, &_);
+ { uint32_t x692; uint8_t x693 = _addcarryx_u32(x690, x632, x671, &x692);
+ { uint32_t x695; uint8_t x696 = _addcarryx_u32(x693, x635, x674, &x695);
+ { uint32_t x698; uint8_t x699 = _addcarryx_u32(x696, x638, x677, &x698);
+ { uint32_t x701; uint8_t x702 = _addcarryx_u32(x699, x641, x680, &x701);
+ { uint32_t x704; uint8_t x705 = _addcarryx_u32(x702, x644, x683, &x704);
+ { uint32_t x707; uint8_t x708 = _addcarryx_u32(x705, x647, x686, &x707);
+ { uint8_t x709 = (x708 + x648);
+ { uint32_t x711; uint8_t x712 = _subborrow_u32(0x0, x692, 0xfffffff5, &x711);
+ { uint32_t x714; uint8_t x715 = _subborrow_u32(x712, x695, 0xffffffff, &x714);
+ { uint32_t x717; uint8_t x718 = _subborrow_u32(x715, x698, 0xffffffff, &x717);
+ { uint32_t x720; uint8_t x721 = _subborrow_u32(x718, x701, 0xffffffff, &x720);
+ { uint32_t x723; uint8_t x724 = _subborrow_u32(x721, x704, 0xffffffff, &x723);
+ { uint32_t x726; uint8_t x727 = _subborrow_u32(x724, x707, 0x3fffffff, &x726);
+ { uint32_t _; uint8_t x730 = _subborrow_u32(x727, x709, 0x0, &_);
+ { uint32_t x731 = cmovznz(x730, x726, x707);
+ { uint32_t x732 = cmovznz(x730, x723, x704);
+ { uint32_t x733 = cmovznz(x730, x720, x701);
+ { uint32_t x734 = cmovznz(x730, x717, x698);
+ { uint32_t x735 = cmovznz(x730, x714, x695);
+ { uint32_t x736 = cmovznz(x730, x711, x692);
+ out[0] = x736;
+ out[1] = x735;
+ out[2] = x734;
+ out[3] = x733;
+ out[4] = x732;
+ out[5] = x731;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e190m11/fenz.c b/src/Specific/montgomery32_2e190m11/fenz.c
index 6d8132b20..2e0454af1 100644
--- a/src/Specific/montgomery32_2e190m11/fenz.c
+++ b/src/Specific/montgomery32_2e190m11/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x11 = (x10 | x9);
-{ uint32_t x12 = (x8 | x11);
-{ uint32_t x13 = (x6 | x12);
-{ uint32_t x14 = (x4 | x13);
-{ uint32_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x11 = (x10 | x9);
+ { uint32_t x12 = (x8 | x11);
+ { uint32_t x13 = (x6 | x12);
+ { uint32_t x14 = (x4 | x13);
+ { uint32_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e190m11/feopp.c b/src/Specific/montgomery32_2e190m11/feopp.c
index c888de137..dd804dde7 100644
--- a/src/Specific/montgomery32_2e190m11/feopp.c
+++ b/src/Specific/montgomery32_2e190m11/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
-{ uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
-{ uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
-{ uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
-{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
-{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
-{ uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
-{ uint32_t x30 = (x29 & 0xfffffff5);
-{ uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
-{ uint32_t x34 = (x29 & 0xffffffff);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
-{ uint32_t x38 = (x29 & 0xffffffff);
-{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
-{ uint32_t x42 = (x29 & 0xffffffff);
-{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
-{ uint32_t x46 = (x29 & 0xffffffff);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
-{ uint32_t x50 = (x29 & 0x3fffffff);
-{ uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
+ { uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
+ { uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
+ { uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
+ { uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
+ { uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
+ { uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+ { uint32_t x30 = (x29 & 0xfffffff5);
+ { uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
+ { uint32_t x34 = (x29 & 0xffffffff);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
+ { uint32_t x38 = (x29 & 0xffffffff);
+ { uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
+ { uint32_t x42 = (x29 & 0xffffffff);
+ { uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
+ { uint32_t x46 = (x29 & 0xffffffff);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
+ { uint32_t x50 = (x29 & 0x3fffffff);
+ { uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e190m11/fesub.c b/src/Specific/montgomery32_2e190m11/fesub.c
index c51ae4448..6eab2592e 100644
--- a/src/Specific/montgomery32_2e190m11/fesub.c
+++ b/src/Specific/montgomery32_2e190m11/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
-{ uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
-{ uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
-{ uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
-{ uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
-{ uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
-{ uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
-{ uint32_t x43 = (x42 & 0xfffffff5);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
-{ uint32_t x47 = (x42 & 0xffffffff);
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
-{ uint32_t x51 = (x42 & 0xffffffff);
-{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
-{ uint32_t x55 = (x42 & 0xffffffff);
-{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
-{ uint32_t x59 = (x42 & 0xffffffff);
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
-{ uint32_t x63 = (x42 & 0x3fffffff);
-{ uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
+ { uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
+ { uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
+ { uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
+ { uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
+ { uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
+ { uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
+ { uint32_t x43 = (x42 & 0xfffffff5);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
+ { uint32_t x47 = (x42 & 0xffffffff);
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
+ { uint32_t x51 = (x42 & 0xffffffff);
+ { uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
+ { uint32_t x55 = (x42 & 0xffffffff);
+ { uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
+ { uint32_t x59 = (x42 & 0xffffffff);
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
+ { uint32_t x63 = (x42 & 0x3fffffff);
+ { uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e191m19/feadd.c b/src/Specific/montgomery32_2e191m19/feadd.c
index 53b3f7549..33f9ebc5c 100644
--- a/src/Specific/montgomery32_2e191m19/feadd.c
+++ b/src/Specific/montgomery32_2e191m19/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
-{ uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
-{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
-{ uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
-{ uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
-{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
-{ uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xffffffed, &x43);
-{ uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
-{ uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
-{ uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
-{ uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
-{ uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x7fffffff, &x58);
-{ uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
-{ uint32_t x63 = cmovznz(x62, x58, x40);
-{ uint32_t x64 = cmovznz(x62, x55, x37);
-{ uint32_t x65 = cmovznz(x62, x52, x34);
-{ uint32_t x66 = cmovznz(x62, x49, x31);
-{ uint32_t x67 = cmovznz(x62, x46, x28);
-{ uint32_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
+ { uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
+ { uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
+ { uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
+ { uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
+ { uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
+ { uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xffffffed, &x43);
+ { uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
+ { uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
+ { uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
+ { uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
+ { uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x7fffffff, &x58);
+ { uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
+ { uint32_t x63 = cmovznz(x62, x58, x40);
+ { uint32_t x64 = cmovznz(x62, x55, x37);
+ { uint32_t x65 = cmovznz(x62, x52, x34);
+ { uint32_t x66 = cmovznz(x62, x49, x31);
+ { uint32_t x67 = cmovznz(x62, x46, x28);
+ { uint32_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e191m19/femul.c b/src/Specific/montgomery32_2e191m19/femul.c
index fdb2f44df..ebb0ee97e 100644
--- a/src/Specific/montgomery32_2e191m19/femul.c
+++ b/src/Specific/montgomery32_2e191m19/femul.c
@@ -1,272 +1,266 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
-{ uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
-{ uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
-{ uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
-{ uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
-{ uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
-{ uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
-{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
-{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
-{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
-{ uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
-{ uint32_t _; uint32_t x61 = _mulx_u32(x25, 0x286bca1b, &_);
-{ uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xffffffed, &x65);
-{ uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
-{ uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
-{ uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
-{ uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
-{ uint32_t x80; uint32_t x79 = _mulx_u32(x61, 0x7fffffff, &x80);
-{ uint32_t x82; uint8_t x83 = _addcarryx_u32(0x0, x65, x67, &x82);
-{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
-{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
-{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
-{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x77, x79, &x94);
-{ uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x80, &x97);
-{ uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x25, x64, &_);
-{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x43, x82, &x103);
-{ uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x46, x85, &x106);
-{ uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x49, x88, &x109);
-{ uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x52, x91, &x112);
-{ uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x55, x94, &x115);
-{ uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x58, x97, &x118);
-{ uint32_t x122; uint32_t x121 = _mulx_u32(x7, x15, &x122);
-{ uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125);
-{ uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128);
-{ uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131);
-{ uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134);
-{ uint32_t x137; uint32_t x136 = _mulx_u32(x7, x22, &x137);
-{ uint32_t x139; uint8_t x140 = _addcarryx_u32(0x0, x122, x124, &x139);
-{ uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142);
-{ uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145);
-{ uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148);
-{ uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x134, x136, &x151);
-{ uint32_t x154; uint8_t _ = _addcarryx_u32(0x0, x152, x137, &x154);
-{ uint32_t x157; uint8_t x158 = _addcarryx_u32(0x0, x103, x121, &x157);
-{ uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160);
-{ uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163);
-{ uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166);
-{ uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169);
-{ uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x118, x151, &x172);
-{ uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x119, x154, &x175);
-{ uint32_t _; uint32_t x178 = _mulx_u32(x157, 0x286bca1b, &_);
-{ uint32_t x182; uint32_t x181 = _mulx_u32(x178, 0xffffffed, &x182);
-{ uint32_t x185; uint32_t x184 = _mulx_u32(x178, 0xffffffff, &x185);
-{ uint32_t x188; uint32_t x187 = _mulx_u32(x178, 0xffffffff, &x188);
-{ uint32_t x191; uint32_t x190 = _mulx_u32(x178, 0xffffffff, &x191);
-{ uint32_t x194; uint32_t x193 = _mulx_u32(x178, 0xffffffff, &x194);
-{ uint32_t x197; uint32_t x196 = _mulx_u32(x178, 0x7fffffff, &x197);
-{ uint32_t x199; uint8_t x200 = _addcarryx_u32(0x0, x182, x184, &x199);
-{ uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202);
-{ uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205);
-{ uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x191, x193, &x208);
-{ uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x194, x196, &x211);
-{ uint32_t x214; uint8_t _ = _addcarryx_u32(0x0, x212, x197, &x214);
-{ uint32_t _; uint8_t x218 = _addcarryx_u32(0x0, x157, x181, &_);
-{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x160, x199, &x220);
-{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x163, x202, &x223);
-{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x166, x205, &x226);
-{ uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x169, x208, &x229);
-{ uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x172, x211, &x232);
-{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x175, x214, &x235);
-{ uint8_t x237 = (x236 + x176);
-{ uint32_t x240; uint32_t x239 = _mulx_u32(x9, x15, &x240);
-{ uint32_t x243; uint32_t x242 = _mulx_u32(x9, x17, &x243);
-{ uint32_t x246; uint32_t x245 = _mulx_u32(x9, x19, &x246);
-{ uint32_t x249; uint32_t x248 = _mulx_u32(x9, x21, &x249);
-{ uint32_t x252; uint32_t x251 = _mulx_u32(x9, x23, &x252);
-{ uint32_t x255; uint32_t x254 = _mulx_u32(x9, x22, &x255);
-{ uint32_t x257; uint8_t x258 = _addcarryx_u32(0x0, x240, x242, &x257);
-{ uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260);
-{ uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263);
-{ uint32_t x266; uint8_t x267 = _addcarryx_u32(x264, x249, x251, &x266);
-{ uint32_t x269; uint8_t x270 = _addcarryx_u32(x267, x252, x254, &x269);
-{ uint32_t x272; uint8_t _ = _addcarryx_u32(0x0, x270, x255, &x272);
-{ uint32_t x275; uint8_t x276 = _addcarryx_u32(0x0, x220, x239, &x275);
-{ uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278);
-{ uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281);
-{ uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284);
-{ uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x232, x266, &x287);
-{ uint32_t x290; uint8_t x291 = _addcarryx_u32(x288, x235, x269, &x290);
-{ uint32_t x293; uint8_t x294 = _addcarryx_u32(x291, x237, x272, &x293);
-{ uint32_t _; uint32_t x296 = _mulx_u32(x275, 0x286bca1b, &_);
-{ uint32_t x300; uint32_t x299 = _mulx_u32(x296, 0xffffffed, &x300);
-{ uint32_t x303; uint32_t x302 = _mulx_u32(x296, 0xffffffff, &x303);
-{ uint32_t x306; uint32_t x305 = _mulx_u32(x296, 0xffffffff, &x306);
-{ uint32_t x309; uint32_t x308 = _mulx_u32(x296, 0xffffffff, &x309);
-{ uint32_t x312; uint32_t x311 = _mulx_u32(x296, 0xffffffff, &x312);
-{ uint32_t x315; uint32_t x314 = _mulx_u32(x296, 0x7fffffff, &x315);
-{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x300, x302, &x317);
-{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320);
-{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x306, x308, &x323);
-{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x309, x311, &x326);
-{ uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x312, x314, &x329);
-{ uint32_t x332; uint8_t _ = _addcarryx_u32(0x0, x330, x315, &x332);
-{ uint32_t _; uint8_t x336 = _addcarryx_u32(0x0, x275, x299, &_);
-{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x278, x317, &x338);
-{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x281, x320, &x341);
-{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x284, x323, &x344);
-{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x287, x326, &x347);
-{ uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x290, x329, &x350);
-{ uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x293, x332, &x353);
-{ uint8_t x355 = (x354 + x294);
-{ uint32_t x358; uint32_t x357 = _mulx_u32(x11, x15, &x358);
-{ uint32_t x361; uint32_t x360 = _mulx_u32(x11, x17, &x361);
-{ uint32_t x364; uint32_t x363 = _mulx_u32(x11, x19, &x364);
-{ uint32_t x367; uint32_t x366 = _mulx_u32(x11, x21, &x367);
-{ uint32_t x370; uint32_t x369 = _mulx_u32(x11, x23, &x370);
-{ uint32_t x373; uint32_t x372 = _mulx_u32(x11, x22, &x373);
-{ uint32_t x375; uint8_t x376 = _addcarryx_u32(0x0, x358, x360, &x375);
-{ uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
-{ uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x364, x366, &x381);
-{ uint32_t x384; uint8_t x385 = _addcarryx_u32(x382, x367, x369, &x384);
-{ uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x370, x372, &x387);
-{ uint32_t x390; uint8_t _ = _addcarryx_u32(0x0, x388, x373, &x390);
-{ uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x338, x357, &x393);
-{ uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
-{ uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
-{ uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x347, x381, &x402);
-{ uint32_t x405; uint8_t x406 = _addcarryx_u32(x403, x350, x384, &x405);
-{ uint32_t x408; uint8_t x409 = _addcarryx_u32(x406, x353, x387, &x408);
-{ uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x355, x390, &x411);
-{ uint32_t _; uint32_t x414 = _mulx_u32(x393, 0x286bca1b, &_);
-{ uint32_t x418; uint32_t x417 = _mulx_u32(x414, 0xffffffed, &x418);
-{ uint32_t x421; uint32_t x420 = _mulx_u32(x414, 0xffffffff, &x421);
-{ uint32_t x424; uint32_t x423 = _mulx_u32(x414, 0xffffffff, &x424);
-{ uint32_t x427; uint32_t x426 = _mulx_u32(x414, 0xffffffff, &x427);
-{ uint32_t x430; uint32_t x429 = _mulx_u32(x414, 0xffffffff, &x430);
-{ uint32_t x433; uint32_t x432 = _mulx_u32(x414, 0x7fffffff, &x433);
-{ uint32_t x435; uint8_t x436 = _addcarryx_u32(0x0, x418, x420, &x435);
-{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x421, x423, &x438);
-{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x424, x426, &x441);
-{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x427, x429, &x444);
-{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x430, x432, &x447);
-{ uint32_t x450; uint8_t _ = _addcarryx_u32(0x0, x448, x433, &x450);
-{ uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x393, x417, &_);
-{ uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x396, x435, &x456);
-{ uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x399, x438, &x459);
-{ uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x402, x441, &x462);
-{ uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x405, x444, &x465);
-{ uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x408, x447, &x468);
-{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x411, x450, &x471);
-{ uint8_t x473 = (x472 + x412);
-{ uint32_t x476; uint32_t x475 = _mulx_u32(x13, x15, &x476);
-{ uint32_t x479; uint32_t x478 = _mulx_u32(x13, x17, &x479);
-{ uint32_t x482; uint32_t x481 = _mulx_u32(x13, x19, &x482);
-{ uint32_t x485; uint32_t x484 = _mulx_u32(x13, x21, &x485);
-{ uint32_t x488; uint32_t x487 = _mulx_u32(x13, x23, &x488);
-{ uint32_t x491; uint32_t x490 = _mulx_u32(x13, x22, &x491);
-{ uint32_t x493; uint8_t x494 = _addcarryx_u32(0x0, x476, x478, &x493);
-{ uint32_t x496; uint8_t x497 = _addcarryx_u32(x494, x479, x481, &x496);
-{ uint32_t x499; uint8_t x500 = _addcarryx_u32(x497, x482, x484, &x499);
-{ uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x485, x487, &x502);
-{ uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x488, x490, &x505);
-{ uint32_t x508; uint8_t _ = _addcarryx_u32(0x0, x506, x491, &x508);
-{ uint32_t x511; uint8_t x512 = _addcarryx_u32(0x0, x456, x475, &x511);
-{ uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514);
-{ uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x462, x496, &x517);
-{ uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x465, x499, &x520);
-{ uint32_t x523; uint8_t x524 = _addcarryx_u32(x521, x468, x502, &x523);
-{ uint32_t x526; uint8_t x527 = _addcarryx_u32(x524, x471, x505, &x526);
-{ uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x473, x508, &x529);
-{ uint32_t _; uint32_t x532 = _mulx_u32(x511, 0x286bca1b, &_);
-{ uint32_t x536; uint32_t x535 = _mulx_u32(x532, 0xffffffed, &x536);
-{ uint32_t x539; uint32_t x538 = _mulx_u32(x532, 0xffffffff, &x539);
-{ uint32_t x542; uint32_t x541 = _mulx_u32(x532, 0xffffffff, &x542);
-{ uint32_t x545; uint32_t x544 = _mulx_u32(x532, 0xffffffff, &x545);
-{ uint32_t x548; uint32_t x547 = _mulx_u32(x532, 0xffffffff, &x548);
-{ uint32_t x551; uint32_t x550 = _mulx_u32(x532, 0x7fffffff, &x551);
-{ uint32_t x553; uint8_t x554 = _addcarryx_u32(0x0, x536, x538, &x553);
-{ uint32_t x556; uint8_t x557 = _addcarryx_u32(x554, x539, x541, &x556);
-{ uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x542, x544, &x559);
-{ uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x545, x547, &x562);
-{ uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x548, x550, &x565);
-{ uint32_t x568; uint8_t _ = _addcarryx_u32(0x0, x566, x551, &x568);
-{ uint32_t _; uint8_t x572 = _addcarryx_u32(0x0, x511, x535, &_);
-{ uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x514, x553, &x574);
-{ uint32_t x577; uint8_t x578 = _addcarryx_u32(x575, x517, x556, &x577);
-{ uint32_t x580; uint8_t x581 = _addcarryx_u32(x578, x520, x559, &x580);
-{ uint32_t x583; uint8_t x584 = _addcarryx_u32(x581, x523, x562, &x583);
-{ uint32_t x586; uint8_t x587 = _addcarryx_u32(x584, x526, x565, &x586);
-{ uint32_t x589; uint8_t x590 = _addcarryx_u32(x587, x529, x568, &x589);
-{ uint8_t x591 = (x590 + x530);
-{ uint32_t x594; uint32_t x593 = _mulx_u32(x12, x15, &x594);
-{ uint32_t x597; uint32_t x596 = _mulx_u32(x12, x17, &x597);
-{ uint32_t x600; uint32_t x599 = _mulx_u32(x12, x19, &x600);
-{ uint32_t x603; uint32_t x602 = _mulx_u32(x12, x21, &x603);
-{ uint32_t x606; uint32_t x605 = _mulx_u32(x12, x23, &x606);
-{ uint32_t x609; uint32_t x608 = _mulx_u32(x12, x22, &x609);
-{ uint32_t x611; uint8_t x612 = _addcarryx_u32(0x0, x594, x596, &x611);
-{ uint32_t x614; uint8_t x615 = _addcarryx_u32(x612, x597, x599, &x614);
-{ uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x600, x602, &x617);
-{ uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x603, x605, &x620);
-{ uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x606, x608, &x623);
-{ uint32_t x626; uint8_t _ = _addcarryx_u32(0x0, x624, x609, &x626);
-{ uint32_t x629; uint8_t x630 = _addcarryx_u32(0x0, x574, x593, &x629);
-{ uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x577, x611, &x632);
-{ uint32_t x635; uint8_t x636 = _addcarryx_u32(x633, x580, x614, &x635);
-{ uint32_t x638; uint8_t x639 = _addcarryx_u32(x636, x583, x617, &x638);
-{ uint32_t x641; uint8_t x642 = _addcarryx_u32(x639, x586, x620, &x641);
-{ uint32_t x644; uint8_t x645 = _addcarryx_u32(x642, x589, x623, &x644);
-{ uint32_t x647; uint8_t x648 = _addcarryx_u32(x645, x591, x626, &x647);
-{ uint32_t _; uint32_t x650 = _mulx_u32(x629, 0x286bca1b, &_);
-{ uint32_t x654; uint32_t x653 = _mulx_u32(x650, 0xffffffed, &x654);
-{ uint32_t x657; uint32_t x656 = _mulx_u32(x650, 0xffffffff, &x657);
-{ uint32_t x660; uint32_t x659 = _mulx_u32(x650, 0xffffffff, &x660);
-{ uint32_t x663; uint32_t x662 = _mulx_u32(x650, 0xffffffff, &x663);
-{ uint32_t x666; uint32_t x665 = _mulx_u32(x650, 0xffffffff, &x666);
-{ uint32_t x669; uint32_t x668 = _mulx_u32(x650, 0x7fffffff, &x669);
-{ uint32_t x671; uint8_t x672 = _addcarryx_u32(0x0, x654, x656, &x671);
-{ uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x657, x659, &x674);
-{ uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x660, x662, &x677);
-{ uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x663, x665, &x680);
-{ uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x666, x668, &x683);
-{ uint32_t x686; uint8_t _ = _addcarryx_u32(0x0, x684, x669, &x686);
-{ uint32_t _; uint8_t x690 = _addcarryx_u32(0x0, x629, x653, &_);
-{ uint32_t x692; uint8_t x693 = _addcarryx_u32(x690, x632, x671, &x692);
-{ uint32_t x695; uint8_t x696 = _addcarryx_u32(x693, x635, x674, &x695);
-{ uint32_t x698; uint8_t x699 = _addcarryx_u32(x696, x638, x677, &x698);
-{ uint32_t x701; uint8_t x702 = _addcarryx_u32(x699, x641, x680, &x701);
-{ uint32_t x704; uint8_t x705 = _addcarryx_u32(x702, x644, x683, &x704);
-{ uint32_t x707; uint8_t x708 = _addcarryx_u32(x705, x647, x686, &x707);
-{ uint8_t x709 = (x708 + x648);
-{ uint32_t x711; uint8_t x712 = _subborrow_u32(0x0, x692, 0xffffffed, &x711);
-{ uint32_t x714; uint8_t x715 = _subborrow_u32(x712, x695, 0xffffffff, &x714);
-{ uint32_t x717; uint8_t x718 = _subborrow_u32(x715, x698, 0xffffffff, &x717);
-{ uint32_t x720; uint8_t x721 = _subborrow_u32(x718, x701, 0xffffffff, &x720);
-{ uint32_t x723; uint8_t x724 = _subborrow_u32(x721, x704, 0xffffffff, &x723);
-{ uint32_t x726; uint8_t x727 = _subborrow_u32(x724, x707, 0x7fffffff, &x726);
-{ uint32_t _; uint8_t x730 = _subborrow_u32(x727, x709, 0x0, &_);
-{ uint32_t x731 = cmovznz(x730, x726, x707);
-{ uint32_t x732 = cmovznz(x730, x723, x704);
-{ uint32_t x733 = cmovznz(x730, x720, x701);
-{ uint32_t x734 = cmovznz(x730, x717, x698);
-{ uint32_t x735 = cmovznz(x730, x714, x695);
-{ uint32_t x736 = cmovznz(x730, x711, x692);
-out[0] = x731;
-out[1] = x732;
-out[2] = x733;
-out[3] = x734;
-out[4] = x735;
-out[5] = x736;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
+ { uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
+ { uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
+ { uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
+ { uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
+ { uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
+ { uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
+ { uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
+ { uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
+ { uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
+ { uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
+ { uint32_t _; uint32_t x61 = _mulx_u32(x25, 0x286bca1b, &_);
+ { uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xffffffed, &x65);
+ { uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
+ { uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
+ { uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
+ { uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
+ { uint32_t x80; uint32_t x79 = _mulx_u32(x61, 0x7fffffff, &x80);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(0x0, x65, x67, &x82);
+ { uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
+ { uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
+ { uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
+ { uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x77, x79, &x94);
+ { uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x80, &x97);
+ { uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x25, x64, &_);
+ { uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x43, x82, &x103);
+ { uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x46, x85, &x106);
+ { uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x49, x88, &x109);
+ { uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x52, x91, &x112);
+ { uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x55, x94, &x115);
+ { uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x58, x97, &x118);
+ { uint32_t x122; uint32_t x121 = _mulx_u32(x7, x15, &x122);
+ { uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125);
+ { uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128);
+ { uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131);
+ { uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134);
+ { uint32_t x137; uint32_t x136 = _mulx_u32(x7, x22, &x137);
+ { uint32_t x139; uint8_t x140 = _addcarryx_u32(0x0, x122, x124, &x139);
+ { uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142);
+ { uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145);
+ { uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148);
+ { uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x134, x136, &x151);
+ { uint32_t x154; uint8_t _ = _addcarryx_u32(0x0, x152, x137, &x154);
+ { uint32_t x157; uint8_t x158 = _addcarryx_u32(0x0, x103, x121, &x157);
+ { uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160);
+ { uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163);
+ { uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166);
+ { uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169);
+ { uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x118, x151, &x172);
+ { uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x119, x154, &x175);
+ { uint32_t _; uint32_t x178 = _mulx_u32(x157, 0x286bca1b, &_);
+ { uint32_t x182; uint32_t x181 = _mulx_u32(x178, 0xffffffed, &x182);
+ { uint32_t x185; uint32_t x184 = _mulx_u32(x178, 0xffffffff, &x185);
+ { uint32_t x188; uint32_t x187 = _mulx_u32(x178, 0xffffffff, &x188);
+ { uint32_t x191; uint32_t x190 = _mulx_u32(x178, 0xffffffff, &x191);
+ { uint32_t x194; uint32_t x193 = _mulx_u32(x178, 0xffffffff, &x194);
+ { uint32_t x197; uint32_t x196 = _mulx_u32(x178, 0x7fffffff, &x197);
+ { uint32_t x199; uint8_t x200 = _addcarryx_u32(0x0, x182, x184, &x199);
+ { uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202);
+ { uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205);
+ { uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x191, x193, &x208);
+ { uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x194, x196, &x211);
+ { uint32_t x214; uint8_t _ = _addcarryx_u32(0x0, x212, x197, &x214);
+ { uint32_t _; uint8_t x218 = _addcarryx_u32(0x0, x157, x181, &_);
+ { uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x160, x199, &x220);
+ { uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x163, x202, &x223);
+ { uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x166, x205, &x226);
+ { uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x169, x208, &x229);
+ { uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x172, x211, &x232);
+ { uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x175, x214, &x235);
+ { uint8_t x237 = (x236 + x176);
+ { uint32_t x240; uint32_t x239 = _mulx_u32(x9, x15, &x240);
+ { uint32_t x243; uint32_t x242 = _mulx_u32(x9, x17, &x243);
+ { uint32_t x246; uint32_t x245 = _mulx_u32(x9, x19, &x246);
+ { uint32_t x249; uint32_t x248 = _mulx_u32(x9, x21, &x249);
+ { uint32_t x252; uint32_t x251 = _mulx_u32(x9, x23, &x252);
+ { uint32_t x255; uint32_t x254 = _mulx_u32(x9, x22, &x255);
+ { uint32_t x257; uint8_t x258 = _addcarryx_u32(0x0, x240, x242, &x257);
+ { uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260);
+ { uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263);
+ { uint32_t x266; uint8_t x267 = _addcarryx_u32(x264, x249, x251, &x266);
+ { uint32_t x269; uint8_t x270 = _addcarryx_u32(x267, x252, x254, &x269);
+ { uint32_t x272; uint8_t _ = _addcarryx_u32(0x0, x270, x255, &x272);
+ { uint32_t x275; uint8_t x276 = _addcarryx_u32(0x0, x220, x239, &x275);
+ { uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278);
+ { uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281);
+ { uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284);
+ { uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x232, x266, &x287);
+ { uint32_t x290; uint8_t x291 = _addcarryx_u32(x288, x235, x269, &x290);
+ { uint32_t x293; uint8_t x294 = _addcarryx_u32(x291, x237, x272, &x293);
+ { uint32_t _; uint32_t x296 = _mulx_u32(x275, 0x286bca1b, &_);
+ { uint32_t x300; uint32_t x299 = _mulx_u32(x296, 0xffffffed, &x300);
+ { uint32_t x303; uint32_t x302 = _mulx_u32(x296, 0xffffffff, &x303);
+ { uint32_t x306; uint32_t x305 = _mulx_u32(x296, 0xffffffff, &x306);
+ { uint32_t x309; uint32_t x308 = _mulx_u32(x296, 0xffffffff, &x309);
+ { uint32_t x312; uint32_t x311 = _mulx_u32(x296, 0xffffffff, &x312);
+ { uint32_t x315; uint32_t x314 = _mulx_u32(x296, 0x7fffffff, &x315);
+ { uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x300, x302, &x317);
+ { uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320);
+ { uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x306, x308, &x323);
+ { uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x309, x311, &x326);
+ { uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x312, x314, &x329);
+ { uint32_t x332; uint8_t _ = _addcarryx_u32(0x0, x330, x315, &x332);
+ { uint32_t _; uint8_t x336 = _addcarryx_u32(0x0, x275, x299, &_);
+ { uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x278, x317, &x338);
+ { uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x281, x320, &x341);
+ { uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x284, x323, &x344);
+ { uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x287, x326, &x347);
+ { uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x290, x329, &x350);
+ { uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x293, x332, &x353);
+ { uint8_t x355 = (x354 + x294);
+ { uint32_t x358; uint32_t x357 = _mulx_u32(x11, x15, &x358);
+ { uint32_t x361; uint32_t x360 = _mulx_u32(x11, x17, &x361);
+ { uint32_t x364; uint32_t x363 = _mulx_u32(x11, x19, &x364);
+ { uint32_t x367; uint32_t x366 = _mulx_u32(x11, x21, &x367);
+ { uint32_t x370; uint32_t x369 = _mulx_u32(x11, x23, &x370);
+ { uint32_t x373; uint32_t x372 = _mulx_u32(x11, x22, &x373);
+ { uint32_t x375; uint8_t x376 = _addcarryx_u32(0x0, x358, x360, &x375);
+ { uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
+ { uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x364, x366, &x381);
+ { uint32_t x384; uint8_t x385 = _addcarryx_u32(x382, x367, x369, &x384);
+ { uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x370, x372, &x387);
+ { uint32_t x390; uint8_t _ = _addcarryx_u32(0x0, x388, x373, &x390);
+ { uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x338, x357, &x393);
+ { uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
+ { uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
+ { uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x347, x381, &x402);
+ { uint32_t x405; uint8_t x406 = _addcarryx_u32(x403, x350, x384, &x405);
+ { uint32_t x408; uint8_t x409 = _addcarryx_u32(x406, x353, x387, &x408);
+ { uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x355, x390, &x411);
+ { uint32_t _; uint32_t x414 = _mulx_u32(x393, 0x286bca1b, &_);
+ { uint32_t x418; uint32_t x417 = _mulx_u32(x414, 0xffffffed, &x418);
+ { uint32_t x421; uint32_t x420 = _mulx_u32(x414, 0xffffffff, &x421);
+ { uint32_t x424; uint32_t x423 = _mulx_u32(x414, 0xffffffff, &x424);
+ { uint32_t x427; uint32_t x426 = _mulx_u32(x414, 0xffffffff, &x427);
+ { uint32_t x430; uint32_t x429 = _mulx_u32(x414, 0xffffffff, &x430);
+ { uint32_t x433; uint32_t x432 = _mulx_u32(x414, 0x7fffffff, &x433);
+ { uint32_t x435; uint8_t x436 = _addcarryx_u32(0x0, x418, x420, &x435);
+ { uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x421, x423, &x438);
+ { uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x424, x426, &x441);
+ { uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x427, x429, &x444);
+ { uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x430, x432, &x447);
+ { uint32_t x450; uint8_t _ = _addcarryx_u32(0x0, x448, x433, &x450);
+ { uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x393, x417, &_);
+ { uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x396, x435, &x456);
+ { uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x399, x438, &x459);
+ { uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x402, x441, &x462);
+ { uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x405, x444, &x465);
+ { uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x408, x447, &x468);
+ { uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x411, x450, &x471);
+ { uint8_t x473 = (x472 + x412);
+ { uint32_t x476; uint32_t x475 = _mulx_u32(x13, x15, &x476);
+ { uint32_t x479; uint32_t x478 = _mulx_u32(x13, x17, &x479);
+ { uint32_t x482; uint32_t x481 = _mulx_u32(x13, x19, &x482);
+ { uint32_t x485; uint32_t x484 = _mulx_u32(x13, x21, &x485);
+ { uint32_t x488; uint32_t x487 = _mulx_u32(x13, x23, &x488);
+ { uint32_t x491; uint32_t x490 = _mulx_u32(x13, x22, &x491);
+ { uint32_t x493; uint8_t x494 = _addcarryx_u32(0x0, x476, x478, &x493);
+ { uint32_t x496; uint8_t x497 = _addcarryx_u32(x494, x479, x481, &x496);
+ { uint32_t x499; uint8_t x500 = _addcarryx_u32(x497, x482, x484, &x499);
+ { uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x485, x487, &x502);
+ { uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x488, x490, &x505);
+ { uint32_t x508; uint8_t _ = _addcarryx_u32(0x0, x506, x491, &x508);
+ { uint32_t x511; uint8_t x512 = _addcarryx_u32(0x0, x456, x475, &x511);
+ { uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514);
+ { uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x462, x496, &x517);
+ { uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x465, x499, &x520);
+ { uint32_t x523; uint8_t x524 = _addcarryx_u32(x521, x468, x502, &x523);
+ { uint32_t x526; uint8_t x527 = _addcarryx_u32(x524, x471, x505, &x526);
+ { uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x473, x508, &x529);
+ { uint32_t _; uint32_t x532 = _mulx_u32(x511, 0x286bca1b, &_);
+ { uint32_t x536; uint32_t x535 = _mulx_u32(x532, 0xffffffed, &x536);
+ { uint32_t x539; uint32_t x538 = _mulx_u32(x532, 0xffffffff, &x539);
+ { uint32_t x542; uint32_t x541 = _mulx_u32(x532, 0xffffffff, &x542);
+ { uint32_t x545; uint32_t x544 = _mulx_u32(x532, 0xffffffff, &x545);
+ { uint32_t x548; uint32_t x547 = _mulx_u32(x532, 0xffffffff, &x548);
+ { uint32_t x551; uint32_t x550 = _mulx_u32(x532, 0x7fffffff, &x551);
+ { uint32_t x553; uint8_t x554 = _addcarryx_u32(0x0, x536, x538, &x553);
+ { uint32_t x556; uint8_t x557 = _addcarryx_u32(x554, x539, x541, &x556);
+ { uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x542, x544, &x559);
+ { uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x545, x547, &x562);
+ { uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x548, x550, &x565);
+ { uint32_t x568; uint8_t _ = _addcarryx_u32(0x0, x566, x551, &x568);
+ { uint32_t _; uint8_t x572 = _addcarryx_u32(0x0, x511, x535, &_);
+ { uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x514, x553, &x574);
+ { uint32_t x577; uint8_t x578 = _addcarryx_u32(x575, x517, x556, &x577);
+ { uint32_t x580; uint8_t x581 = _addcarryx_u32(x578, x520, x559, &x580);
+ { uint32_t x583; uint8_t x584 = _addcarryx_u32(x581, x523, x562, &x583);
+ { uint32_t x586; uint8_t x587 = _addcarryx_u32(x584, x526, x565, &x586);
+ { uint32_t x589; uint8_t x590 = _addcarryx_u32(x587, x529, x568, &x589);
+ { uint8_t x591 = (x590 + x530);
+ { uint32_t x594; uint32_t x593 = _mulx_u32(x12, x15, &x594);
+ { uint32_t x597; uint32_t x596 = _mulx_u32(x12, x17, &x597);
+ { uint32_t x600; uint32_t x599 = _mulx_u32(x12, x19, &x600);
+ { uint32_t x603; uint32_t x602 = _mulx_u32(x12, x21, &x603);
+ { uint32_t x606; uint32_t x605 = _mulx_u32(x12, x23, &x606);
+ { uint32_t x609; uint32_t x608 = _mulx_u32(x12, x22, &x609);
+ { uint32_t x611; uint8_t x612 = _addcarryx_u32(0x0, x594, x596, &x611);
+ { uint32_t x614; uint8_t x615 = _addcarryx_u32(x612, x597, x599, &x614);
+ { uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x600, x602, &x617);
+ { uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x603, x605, &x620);
+ { uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x606, x608, &x623);
+ { uint32_t x626; uint8_t _ = _addcarryx_u32(0x0, x624, x609, &x626);
+ { uint32_t x629; uint8_t x630 = _addcarryx_u32(0x0, x574, x593, &x629);
+ { uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x577, x611, &x632);
+ { uint32_t x635; uint8_t x636 = _addcarryx_u32(x633, x580, x614, &x635);
+ { uint32_t x638; uint8_t x639 = _addcarryx_u32(x636, x583, x617, &x638);
+ { uint32_t x641; uint8_t x642 = _addcarryx_u32(x639, x586, x620, &x641);
+ { uint32_t x644; uint8_t x645 = _addcarryx_u32(x642, x589, x623, &x644);
+ { uint32_t x647; uint8_t x648 = _addcarryx_u32(x645, x591, x626, &x647);
+ { uint32_t _; uint32_t x650 = _mulx_u32(x629, 0x286bca1b, &_);
+ { uint32_t x654; uint32_t x653 = _mulx_u32(x650, 0xffffffed, &x654);
+ { uint32_t x657; uint32_t x656 = _mulx_u32(x650, 0xffffffff, &x657);
+ { uint32_t x660; uint32_t x659 = _mulx_u32(x650, 0xffffffff, &x660);
+ { uint32_t x663; uint32_t x662 = _mulx_u32(x650, 0xffffffff, &x663);
+ { uint32_t x666; uint32_t x665 = _mulx_u32(x650, 0xffffffff, &x666);
+ { uint32_t x669; uint32_t x668 = _mulx_u32(x650, 0x7fffffff, &x669);
+ { uint32_t x671; uint8_t x672 = _addcarryx_u32(0x0, x654, x656, &x671);
+ { uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x657, x659, &x674);
+ { uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x660, x662, &x677);
+ { uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x663, x665, &x680);
+ { uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x666, x668, &x683);
+ { uint32_t x686; uint8_t _ = _addcarryx_u32(0x0, x684, x669, &x686);
+ { uint32_t _; uint8_t x690 = _addcarryx_u32(0x0, x629, x653, &_);
+ { uint32_t x692; uint8_t x693 = _addcarryx_u32(x690, x632, x671, &x692);
+ { uint32_t x695; uint8_t x696 = _addcarryx_u32(x693, x635, x674, &x695);
+ { uint32_t x698; uint8_t x699 = _addcarryx_u32(x696, x638, x677, &x698);
+ { uint32_t x701; uint8_t x702 = _addcarryx_u32(x699, x641, x680, &x701);
+ { uint32_t x704; uint8_t x705 = _addcarryx_u32(x702, x644, x683, &x704);
+ { uint32_t x707; uint8_t x708 = _addcarryx_u32(x705, x647, x686, &x707);
+ { uint8_t x709 = (x708 + x648);
+ { uint32_t x711; uint8_t x712 = _subborrow_u32(0x0, x692, 0xffffffed, &x711);
+ { uint32_t x714; uint8_t x715 = _subborrow_u32(x712, x695, 0xffffffff, &x714);
+ { uint32_t x717; uint8_t x718 = _subborrow_u32(x715, x698, 0xffffffff, &x717);
+ { uint32_t x720; uint8_t x721 = _subborrow_u32(x718, x701, 0xffffffff, &x720);
+ { uint32_t x723; uint8_t x724 = _subborrow_u32(x721, x704, 0xffffffff, &x723);
+ { uint32_t x726; uint8_t x727 = _subborrow_u32(x724, x707, 0x7fffffff, &x726);
+ { uint32_t _; uint8_t x730 = _subborrow_u32(x727, x709, 0x0, &_);
+ { uint32_t x731 = cmovznz(x730, x726, x707);
+ { uint32_t x732 = cmovznz(x730, x723, x704);
+ { uint32_t x733 = cmovznz(x730, x720, x701);
+ { uint32_t x734 = cmovznz(x730, x717, x698);
+ { uint32_t x735 = cmovznz(x730, x714, x695);
+ { uint32_t x736 = cmovznz(x730, x711, x692);
+ out[0] = x736;
+ out[1] = x735;
+ out[2] = x734;
+ out[3] = x733;
+ out[4] = x732;
+ out[5] = x731;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e191m19/fenz.c b/src/Specific/montgomery32_2e191m19/fenz.c
index 6d8132b20..2e0454af1 100644
--- a/src/Specific/montgomery32_2e191m19/fenz.c
+++ b/src/Specific/montgomery32_2e191m19/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x11 = (x10 | x9);
-{ uint32_t x12 = (x8 | x11);
-{ uint32_t x13 = (x6 | x12);
-{ uint32_t x14 = (x4 | x13);
-{ uint32_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x11 = (x10 | x9);
+ { uint32_t x12 = (x8 | x11);
+ { uint32_t x13 = (x6 | x12);
+ { uint32_t x14 = (x4 | x13);
+ { uint32_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e191m19/feopp.c b/src/Specific/montgomery32_2e191m19/feopp.c
index e27f085de..b72f54a6c 100644
--- a/src/Specific/montgomery32_2e191m19/feopp.c
+++ b/src/Specific/montgomery32_2e191m19/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
-{ uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
-{ uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
-{ uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
-{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
-{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
-{ uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
-{ uint32_t x30 = (x29 & 0xffffffed);
-{ uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
-{ uint32_t x34 = (x29 & 0xffffffff);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
-{ uint32_t x38 = (x29 & 0xffffffff);
-{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
-{ uint32_t x42 = (x29 & 0xffffffff);
-{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
-{ uint32_t x46 = (x29 & 0xffffffff);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
-{ uint32_t x50 = (x29 & 0x7fffffff);
-{ uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
+ { uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
+ { uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
+ { uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
+ { uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
+ { uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
+ { uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+ { uint32_t x30 = (x29 & 0xffffffed);
+ { uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
+ { uint32_t x34 = (x29 & 0xffffffff);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
+ { uint32_t x38 = (x29 & 0xffffffff);
+ { uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
+ { uint32_t x42 = (x29 & 0xffffffff);
+ { uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
+ { uint32_t x46 = (x29 & 0xffffffff);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
+ { uint32_t x50 = (x29 & 0x7fffffff);
+ { uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e191m19/fesub.c b/src/Specific/montgomery32_2e191m19/fesub.c
index ff4740345..c722fc6d3 100644
--- a/src/Specific/montgomery32_2e191m19/fesub.c
+++ b/src/Specific/montgomery32_2e191m19/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
-{ uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
-{ uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
-{ uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
-{ uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
-{ uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
-{ uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
-{ uint32_t x43 = (x42 & 0xffffffed);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
-{ uint32_t x47 = (x42 & 0xffffffff);
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
-{ uint32_t x51 = (x42 & 0xffffffff);
-{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
-{ uint32_t x55 = (x42 & 0xffffffff);
-{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
-{ uint32_t x59 = (x42 & 0xffffffff);
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
-{ uint32_t x63 = (x42 & 0x7fffffff);
-{ uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
+ { uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
+ { uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
+ { uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
+ { uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
+ { uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
+ { uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
+ { uint32_t x43 = (x42 & 0xffffffed);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
+ { uint32_t x47 = (x42 & 0xffffffff);
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
+ { uint32_t x51 = (x42 & 0xffffffff);
+ { uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
+ { uint32_t x55 = (x42 & 0xffffffff);
+ { uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
+ { uint32_t x59 = (x42 & 0xffffffff);
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
+ { uint32_t x63 = (x42 & 0x7fffffff);
+ { uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e192m2e64m1/feadd.c b/src/Specific/montgomery32_2e192m2e64m1/feadd.c
index 555a4f75f..ffa2edd13 100644
--- a/src/Specific/montgomery32_2e192m2e64m1/feadd.c
+++ b/src/Specific/montgomery32_2e192m2e64m1/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
-{ uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
-{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
-{ uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
-{ uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
-{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
-{ uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xffffffff, &x43);
-{ uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
-{ uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xfffffffe, &x49);
-{ uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
-{ uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
-{ uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0xffffffff, &x58);
-{ uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
-{ uint32_t x63 = cmovznz(x62, x58, x40);
-{ uint32_t x64 = cmovznz(x62, x55, x37);
-{ uint32_t x65 = cmovznz(x62, x52, x34);
-{ uint32_t x66 = cmovznz(x62, x49, x31);
-{ uint32_t x67 = cmovznz(x62, x46, x28);
-{ uint32_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
+ { uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
+ { uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
+ { uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
+ { uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
+ { uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
+ { uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xffffffff, &x43);
+ { uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
+ { uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xfffffffe, &x49);
+ { uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
+ { uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
+ { uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0xffffffff, &x58);
+ { uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
+ { uint32_t x63 = cmovznz(x62, x58, x40);
+ { uint32_t x64 = cmovznz(x62, x55, x37);
+ { uint32_t x65 = cmovznz(x62, x52, x34);
+ { uint32_t x66 = cmovznz(x62, x49, x31);
+ { uint32_t x67 = cmovznz(x62, x46, x28);
+ { uint32_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e192m2e64m1/femul.c b/src/Specific/montgomery32_2e192m2e64m1/femul.c
index 11d0723f4..ba6c4f3de 100644
--- a/src/Specific/montgomery32_2e192m2e64m1/femul.c
+++ b/src/Specific/montgomery32_2e192m2e64m1/femul.c
@@ -1,266 +1,260 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
-{ uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
-{ uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
-{ uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
-{ uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
-{ uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
-{ uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
-{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
-{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
-{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
-{ uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
-{ uint32_t x62; uint32_t x61 = _mulx_u32(x25, 0xffffffff, &x62);
-{ uint32_t x65; uint32_t x64 = _mulx_u32(x25, 0xffffffff, &x65);
-{ uint32_t x68; uint32_t x67 = _mulx_u32(x25, 0xfffffffe, &x68);
-{ uint32_t x71; uint32_t x70 = _mulx_u32(x25, 0xffffffff, &x71);
-{ uint32_t x74; uint32_t x73 = _mulx_u32(x25, 0xffffffff, &x74);
-{ uint32_t x77; uint32_t x76 = _mulx_u32(x25, 0xffffffff, &x77);
-{ uint32_t x79; uint8_t x80 = _addcarryx_u32(0x0, x62, x64, &x79);
-{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x65, x67, &x82);
-{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
-{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
-{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
-{ uint32_t x94; uint8_t _ = _addcarryx_u32(0x0, x92, x77, &x94);
-{ uint32_t _; uint8_t x98 = _addcarryx_u32(0x0, x25, x61, &_);
-{ uint32_t x100; uint8_t x101 = _addcarryx_u32(x98, x43, x79, &x100);
-{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x46, x82, &x103);
-{ uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x49, x85, &x106);
-{ uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x52, x88, &x109);
-{ uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x55, x91, &x112);
-{ uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x58, x94, &x115);
-{ uint32_t x119; uint32_t x118 = _mulx_u32(x7, x15, &x119);
-{ uint32_t x122; uint32_t x121 = _mulx_u32(x7, x17, &x122);
-{ uint32_t x125; uint32_t x124 = _mulx_u32(x7, x19, &x125);
-{ uint32_t x128; uint32_t x127 = _mulx_u32(x7, x21, &x128);
-{ uint32_t x131; uint32_t x130 = _mulx_u32(x7, x23, &x131);
-{ uint32_t x134; uint32_t x133 = _mulx_u32(x7, x22, &x134);
-{ uint32_t x136; uint8_t x137 = _addcarryx_u32(0x0, x119, x121, &x136);
-{ uint32_t x139; uint8_t x140 = _addcarryx_u32(x137, x122, x124, &x139);
-{ uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142);
-{ uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145);
-{ uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148);
-{ uint32_t x151; uint8_t _ = _addcarryx_u32(0x0, x149, x134, &x151);
-{ uint32_t x154; uint8_t x155 = _addcarryx_u32(0x0, x100, x118, &x154);
-{ uint32_t x157; uint8_t x158 = _addcarryx_u32(x155, x103, x136, &x157);
-{ uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160);
-{ uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163);
-{ uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166);
-{ uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169);
-{ uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x116, x151, &x172);
-{ uint32_t x176; uint32_t x175 = _mulx_u32(x154, 0xffffffff, &x176);
-{ uint32_t x179; uint32_t x178 = _mulx_u32(x154, 0xffffffff, &x179);
-{ uint32_t x182; uint32_t x181 = _mulx_u32(x154, 0xfffffffe, &x182);
-{ uint32_t x185; uint32_t x184 = _mulx_u32(x154, 0xffffffff, &x185);
-{ uint32_t x188; uint32_t x187 = _mulx_u32(x154, 0xffffffff, &x188);
-{ uint32_t x191; uint32_t x190 = _mulx_u32(x154, 0xffffffff, &x191);
-{ uint32_t x193; uint8_t x194 = _addcarryx_u32(0x0, x176, x178, &x193);
-{ uint32_t x196; uint8_t x197 = _addcarryx_u32(x194, x179, x181, &x196);
-{ uint32_t x199; uint8_t x200 = _addcarryx_u32(x197, x182, x184, &x199);
-{ uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202);
-{ uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205);
-{ uint32_t x208; uint8_t _ = _addcarryx_u32(0x0, x206, x191, &x208);
-{ uint32_t _; uint8_t x212 = _addcarryx_u32(0x0, x154, x175, &_);
-{ uint32_t x214; uint8_t x215 = _addcarryx_u32(x212, x157, x193, &x214);
-{ uint32_t x217; uint8_t x218 = _addcarryx_u32(x215, x160, x196, &x217);
-{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x163, x199, &x220);
-{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x166, x202, &x223);
-{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x169, x205, &x226);
-{ uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x172, x208, &x229);
-{ uint8_t x231 = (x230 + x173);
-{ uint32_t x234; uint32_t x233 = _mulx_u32(x9, x15, &x234);
-{ uint32_t x237; uint32_t x236 = _mulx_u32(x9, x17, &x237);
-{ uint32_t x240; uint32_t x239 = _mulx_u32(x9, x19, &x240);
-{ uint32_t x243; uint32_t x242 = _mulx_u32(x9, x21, &x243);
-{ uint32_t x246; uint32_t x245 = _mulx_u32(x9, x23, &x246);
-{ uint32_t x249; uint32_t x248 = _mulx_u32(x9, x22, &x249);
-{ uint32_t x251; uint8_t x252 = _addcarryx_u32(0x0, x234, x236, &x251);
-{ uint32_t x254; uint8_t x255 = _addcarryx_u32(x252, x237, x239, &x254);
-{ uint32_t x257; uint8_t x258 = _addcarryx_u32(x255, x240, x242, &x257);
-{ uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260);
-{ uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263);
-{ uint32_t x266; uint8_t _ = _addcarryx_u32(0x0, x264, x249, &x266);
-{ uint32_t x269; uint8_t x270 = _addcarryx_u32(0x0, x214, x233, &x269);
-{ uint32_t x272; uint8_t x273 = _addcarryx_u32(x270, x217, x251, &x272);
-{ uint32_t x275; uint8_t x276 = _addcarryx_u32(x273, x220, x254, &x275);
-{ uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278);
-{ uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281);
-{ uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284);
-{ uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x231, x266, &x287);
-{ uint32_t x291; uint32_t x290 = _mulx_u32(x269, 0xffffffff, &x291);
-{ uint32_t x294; uint32_t x293 = _mulx_u32(x269, 0xffffffff, &x294);
-{ uint32_t x297; uint32_t x296 = _mulx_u32(x269, 0xfffffffe, &x297);
-{ uint32_t x300; uint32_t x299 = _mulx_u32(x269, 0xffffffff, &x300);
-{ uint32_t x303; uint32_t x302 = _mulx_u32(x269, 0xffffffff, &x303);
-{ uint32_t x306; uint32_t x305 = _mulx_u32(x269, 0xffffffff, &x306);
-{ uint32_t x308; uint8_t x309 = _addcarryx_u32(0x0, x291, x293, &x308);
-{ uint32_t x311; uint8_t x312 = _addcarryx_u32(x309, x294, x296, &x311);
-{ uint32_t x314; uint8_t x315 = _addcarryx_u32(x312, x297, x299, &x314);
-{ uint32_t x317; uint8_t x318 = _addcarryx_u32(x315, x300, x302, &x317);
-{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320);
-{ uint32_t x323; uint8_t _ = _addcarryx_u32(0x0, x321, x306, &x323);
-{ uint32_t _; uint8_t x327 = _addcarryx_u32(0x0, x269, x290, &_);
-{ uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x272, x308, &x329);
-{ uint32_t x332; uint8_t x333 = _addcarryx_u32(x330, x275, x311, &x332);
-{ uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x278, x314, &x335);
-{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x281, x317, &x338);
-{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x284, x320, &x341);
-{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x287, x323, &x344);
-{ uint8_t x346 = (x345 + x288);
-{ uint32_t x349; uint32_t x348 = _mulx_u32(x11, x15, &x349);
-{ uint32_t x352; uint32_t x351 = _mulx_u32(x11, x17, &x352);
-{ uint32_t x355; uint32_t x354 = _mulx_u32(x11, x19, &x355);
-{ uint32_t x358; uint32_t x357 = _mulx_u32(x11, x21, &x358);
-{ uint32_t x361; uint32_t x360 = _mulx_u32(x11, x23, &x361);
-{ uint32_t x364; uint32_t x363 = _mulx_u32(x11, x22, &x364);
-{ uint32_t x366; uint8_t x367 = _addcarryx_u32(0x0, x349, x351, &x366);
-{ uint32_t x369; uint8_t x370 = _addcarryx_u32(x367, x352, x354, &x369);
-{ uint32_t x372; uint8_t x373 = _addcarryx_u32(x370, x355, x357, &x372);
-{ uint32_t x375; uint8_t x376 = _addcarryx_u32(x373, x358, x360, &x375);
-{ uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
-{ uint32_t x381; uint8_t _ = _addcarryx_u32(0x0, x379, x364, &x381);
-{ uint32_t x384; uint8_t x385 = _addcarryx_u32(0x0, x329, x348, &x384);
-{ uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x332, x366, &x387);
-{ uint32_t x390; uint8_t x391 = _addcarryx_u32(x388, x335, x369, &x390);
-{ uint32_t x393; uint8_t x394 = _addcarryx_u32(x391, x338, x372, &x393);
-{ uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
-{ uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
-{ uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x346, x381, &x402);
-{ uint32_t x406; uint32_t x405 = _mulx_u32(x384, 0xffffffff, &x406);
-{ uint32_t x409; uint32_t x408 = _mulx_u32(x384, 0xffffffff, &x409);
-{ uint32_t x412; uint32_t x411 = _mulx_u32(x384, 0xfffffffe, &x412);
-{ uint32_t x415; uint32_t x414 = _mulx_u32(x384, 0xffffffff, &x415);
-{ uint32_t x418; uint32_t x417 = _mulx_u32(x384, 0xffffffff, &x418);
-{ uint32_t x421; uint32_t x420 = _mulx_u32(x384, 0xffffffff, &x421);
-{ uint32_t x423; uint8_t x424 = _addcarryx_u32(0x0, x406, x408, &x423);
-{ uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x409, x411, &x426);
-{ uint32_t x429; uint8_t x430 = _addcarryx_u32(x427, x412, x414, &x429);
-{ uint32_t x432; uint8_t x433 = _addcarryx_u32(x430, x415, x417, &x432);
-{ uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x418, x420, &x435);
-{ uint32_t x438; uint8_t _ = _addcarryx_u32(0x0, x436, x421, &x438);
-{ uint32_t _; uint8_t x442 = _addcarryx_u32(0x0, x384, x405, &_);
-{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x387, x423, &x444);
-{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x390, x426, &x447);
-{ uint32_t x450; uint8_t x451 = _addcarryx_u32(x448, x393, x429, &x450);
-{ uint32_t x453; uint8_t x454 = _addcarryx_u32(x451, x396, x432, &x453);
-{ uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x399, x435, &x456);
-{ uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x402, x438, &x459);
-{ uint8_t x461 = (x460 + x403);
-{ uint32_t x464; uint32_t x463 = _mulx_u32(x13, x15, &x464);
-{ uint32_t x467; uint32_t x466 = _mulx_u32(x13, x17, &x467);
-{ uint32_t x470; uint32_t x469 = _mulx_u32(x13, x19, &x470);
-{ uint32_t x473; uint32_t x472 = _mulx_u32(x13, x21, &x473);
-{ uint32_t x476; uint32_t x475 = _mulx_u32(x13, x23, &x476);
-{ uint32_t x479; uint32_t x478 = _mulx_u32(x13, x22, &x479);
-{ uint32_t x481; uint8_t x482 = _addcarryx_u32(0x0, x464, x466, &x481);
-{ uint32_t x484; uint8_t x485 = _addcarryx_u32(x482, x467, x469, &x484);
-{ uint32_t x487; uint8_t x488 = _addcarryx_u32(x485, x470, x472, &x487);
-{ uint32_t x490; uint8_t x491 = _addcarryx_u32(x488, x473, x475, &x490);
-{ uint32_t x493; uint8_t x494 = _addcarryx_u32(x491, x476, x478, &x493);
-{ uint32_t x496; uint8_t _ = _addcarryx_u32(0x0, x494, x479, &x496);
-{ uint32_t x499; uint8_t x500 = _addcarryx_u32(0x0, x444, x463, &x499);
-{ uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x447, x481, &x502);
-{ uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x450, x484, &x505);
-{ uint32_t x508; uint8_t x509 = _addcarryx_u32(x506, x453, x487, &x508);
-{ uint32_t x511; uint8_t x512 = _addcarryx_u32(x509, x456, x490, &x511);
-{ uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514);
-{ uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x461, x496, &x517);
-{ uint32_t x521; uint32_t x520 = _mulx_u32(x499, 0xffffffff, &x521);
-{ uint32_t x524; uint32_t x523 = _mulx_u32(x499, 0xffffffff, &x524);
-{ uint32_t x527; uint32_t x526 = _mulx_u32(x499, 0xfffffffe, &x527);
-{ uint32_t x530; uint32_t x529 = _mulx_u32(x499, 0xffffffff, &x530);
-{ uint32_t x533; uint32_t x532 = _mulx_u32(x499, 0xffffffff, &x533);
-{ uint32_t x536; uint32_t x535 = _mulx_u32(x499, 0xffffffff, &x536);
-{ uint32_t x538; uint8_t x539 = _addcarryx_u32(0x0, x521, x523, &x538);
-{ uint32_t x541; uint8_t x542 = _addcarryx_u32(x539, x524, x526, &x541);
-{ uint32_t x544; uint8_t x545 = _addcarryx_u32(x542, x527, x529, &x544);
-{ uint32_t x547; uint8_t x548 = _addcarryx_u32(x545, x530, x532, &x547);
-{ uint32_t x550; uint8_t x551 = _addcarryx_u32(x548, x533, x535, &x550);
-{ uint32_t x553; uint8_t _ = _addcarryx_u32(0x0, x551, x536, &x553);
-{ uint32_t _; uint8_t x557 = _addcarryx_u32(0x0, x499, x520, &_);
-{ uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x502, x538, &x559);
-{ uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x505, x541, &x562);
-{ uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x508, x544, &x565);
-{ uint32_t x568; uint8_t x569 = _addcarryx_u32(x566, x511, x547, &x568);
-{ uint32_t x571; uint8_t x572 = _addcarryx_u32(x569, x514, x550, &x571);
-{ uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x517, x553, &x574);
-{ uint8_t x576 = (x575 + x518);
-{ uint32_t x579; uint32_t x578 = _mulx_u32(x12, x15, &x579);
-{ uint32_t x582; uint32_t x581 = _mulx_u32(x12, x17, &x582);
-{ uint32_t x585; uint32_t x584 = _mulx_u32(x12, x19, &x585);
-{ uint32_t x588; uint32_t x587 = _mulx_u32(x12, x21, &x588);
-{ uint32_t x591; uint32_t x590 = _mulx_u32(x12, x23, &x591);
-{ uint32_t x594; uint32_t x593 = _mulx_u32(x12, x22, &x594);
-{ uint32_t x596; uint8_t x597 = _addcarryx_u32(0x0, x579, x581, &x596);
-{ uint32_t x599; uint8_t x600 = _addcarryx_u32(x597, x582, x584, &x599);
-{ uint32_t x602; uint8_t x603 = _addcarryx_u32(x600, x585, x587, &x602);
-{ uint32_t x605; uint8_t x606 = _addcarryx_u32(x603, x588, x590, &x605);
-{ uint32_t x608; uint8_t x609 = _addcarryx_u32(x606, x591, x593, &x608);
-{ uint32_t x611; uint8_t _ = _addcarryx_u32(0x0, x609, x594, &x611);
-{ uint32_t x614; uint8_t x615 = _addcarryx_u32(0x0, x559, x578, &x614);
-{ uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x562, x596, &x617);
-{ uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x565, x599, &x620);
-{ uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x568, x602, &x623);
-{ uint32_t x626; uint8_t x627 = _addcarryx_u32(x624, x571, x605, &x626);
-{ uint32_t x629; uint8_t x630 = _addcarryx_u32(x627, x574, x608, &x629);
-{ uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x576, x611, &x632);
-{ uint32_t x636; uint32_t x635 = _mulx_u32(x614, 0xffffffff, &x636);
-{ uint32_t x639; uint32_t x638 = _mulx_u32(x614, 0xffffffff, &x639);
-{ uint32_t x642; uint32_t x641 = _mulx_u32(x614, 0xfffffffe, &x642);
-{ uint32_t x645; uint32_t x644 = _mulx_u32(x614, 0xffffffff, &x645);
-{ uint32_t x648; uint32_t x647 = _mulx_u32(x614, 0xffffffff, &x648);
-{ uint32_t x651; uint32_t x650 = _mulx_u32(x614, 0xffffffff, &x651);
-{ uint32_t x653; uint8_t x654 = _addcarryx_u32(0x0, x636, x638, &x653);
-{ uint32_t x656; uint8_t x657 = _addcarryx_u32(x654, x639, x641, &x656);
-{ uint32_t x659; uint8_t x660 = _addcarryx_u32(x657, x642, x644, &x659);
-{ uint32_t x662; uint8_t x663 = _addcarryx_u32(x660, x645, x647, &x662);
-{ uint32_t x665; uint8_t x666 = _addcarryx_u32(x663, x648, x650, &x665);
-{ uint32_t x668; uint8_t _ = _addcarryx_u32(0x0, x666, x651, &x668);
-{ uint32_t _; uint8_t x672 = _addcarryx_u32(0x0, x614, x635, &_);
-{ uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x617, x653, &x674);
-{ uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x620, x656, &x677);
-{ uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x623, x659, &x680);
-{ uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x626, x662, &x683);
-{ uint32_t x686; uint8_t x687 = _addcarryx_u32(x684, x629, x665, &x686);
-{ uint32_t x689; uint8_t x690 = _addcarryx_u32(x687, x632, x668, &x689);
-{ uint8_t x691 = (x690 + x633);
-{ uint32_t x693; uint8_t x694 = _subborrow_u32(0x0, x674, 0xffffffff, &x693);
-{ uint32_t x696; uint8_t x697 = _subborrow_u32(x694, x677, 0xffffffff, &x696);
-{ uint32_t x699; uint8_t x700 = _subborrow_u32(x697, x680, 0xfffffffe, &x699);
-{ uint32_t x702; uint8_t x703 = _subborrow_u32(x700, x683, 0xffffffff, &x702);
-{ uint32_t x705; uint8_t x706 = _subborrow_u32(x703, x686, 0xffffffff, &x705);
-{ uint32_t x708; uint8_t x709 = _subborrow_u32(x706, x689, 0xffffffff, &x708);
-{ uint32_t _; uint8_t x712 = _subborrow_u32(x709, x691, 0x0, &_);
-{ uint32_t x713 = cmovznz(x712, x708, x689);
-{ uint32_t x714 = cmovznz(x712, x705, x686);
-{ uint32_t x715 = cmovznz(x712, x702, x683);
-{ uint32_t x716 = cmovznz(x712, x699, x680);
-{ uint32_t x717 = cmovznz(x712, x696, x677);
-{ uint32_t x718 = cmovznz(x712, x693, x674);
-out[0] = x713;
-out[1] = x714;
-out[2] = x715;
-out[3] = x716;
-out[4] = x717;
-out[5] = x718;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
+ { uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
+ { uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
+ { uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
+ { uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
+ { uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
+ { uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
+ { uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
+ { uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
+ { uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
+ { uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
+ { uint32_t x62; uint32_t x61 = _mulx_u32(x25, 0xffffffff, &x62);
+ { uint32_t x65; uint32_t x64 = _mulx_u32(x25, 0xffffffff, &x65);
+ { uint32_t x68; uint32_t x67 = _mulx_u32(x25, 0xfffffffe, &x68);
+ { uint32_t x71; uint32_t x70 = _mulx_u32(x25, 0xffffffff, &x71);
+ { uint32_t x74; uint32_t x73 = _mulx_u32(x25, 0xffffffff, &x74);
+ { uint32_t x77; uint32_t x76 = _mulx_u32(x25, 0xffffffff, &x77);
+ { uint32_t x79; uint8_t x80 = _addcarryx_u32(0x0, x62, x64, &x79);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x65, x67, &x82);
+ { uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
+ { uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
+ { uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
+ { uint32_t x94; uint8_t _ = _addcarryx_u32(0x0, x92, x77, &x94);
+ { uint32_t _; uint8_t x98 = _addcarryx_u32(0x0, x25, x61, &_);
+ { uint32_t x100; uint8_t x101 = _addcarryx_u32(x98, x43, x79, &x100);
+ { uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x46, x82, &x103);
+ { uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x49, x85, &x106);
+ { uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x52, x88, &x109);
+ { uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x55, x91, &x112);
+ { uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x58, x94, &x115);
+ { uint32_t x119; uint32_t x118 = _mulx_u32(x7, x15, &x119);
+ { uint32_t x122; uint32_t x121 = _mulx_u32(x7, x17, &x122);
+ { uint32_t x125; uint32_t x124 = _mulx_u32(x7, x19, &x125);
+ { uint32_t x128; uint32_t x127 = _mulx_u32(x7, x21, &x128);
+ { uint32_t x131; uint32_t x130 = _mulx_u32(x7, x23, &x131);
+ { uint32_t x134; uint32_t x133 = _mulx_u32(x7, x22, &x134);
+ { uint32_t x136; uint8_t x137 = _addcarryx_u32(0x0, x119, x121, &x136);
+ { uint32_t x139; uint8_t x140 = _addcarryx_u32(x137, x122, x124, &x139);
+ { uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142);
+ { uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145);
+ { uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148);
+ { uint32_t x151; uint8_t _ = _addcarryx_u32(0x0, x149, x134, &x151);
+ { uint32_t x154; uint8_t x155 = _addcarryx_u32(0x0, x100, x118, &x154);
+ { uint32_t x157; uint8_t x158 = _addcarryx_u32(x155, x103, x136, &x157);
+ { uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160);
+ { uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163);
+ { uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166);
+ { uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169);
+ { uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x116, x151, &x172);
+ { uint32_t x176; uint32_t x175 = _mulx_u32(x154, 0xffffffff, &x176);
+ { uint32_t x179; uint32_t x178 = _mulx_u32(x154, 0xffffffff, &x179);
+ { uint32_t x182; uint32_t x181 = _mulx_u32(x154, 0xfffffffe, &x182);
+ { uint32_t x185; uint32_t x184 = _mulx_u32(x154, 0xffffffff, &x185);
+ { uint32_t x188; uint32_t x187 = _mulx_u32(x154, 0xffffffff, &x188);
+ { uint32_t x191; uint32_t x190 = _mulx_u32(x154, 0xffffffff, &x191);
+ { uint32_t x193; uint8_t x194 = _addcarryx_u32(0x0, x176, x178, &x193);
+ { uint32_t x196; uint8_t x197 = _addcarryx_u32(x194, x179, x181, &x196);
+ { uint32_t x199; uint8_t x200 = _addcarryx_u32(x197, x182, x184, &x199);
+ { uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202);
+ { uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205);
+ { uint32_t x208; uint8_t _ = _addcarryx_u32(0x0, x206, x191, &x208);
+ { uint32_t _; uint8_t x212 = _addcarryx_u32(0x0, x154, x175, &_);
+ { uint32_t x214; uint8_t x215 = _addcarryx_u32(x212, x157, x193, &x214);
+ { uint32_t x217; uint8_t x218 = _addcarryx_u32(x215, x160, x196, &x217);
+ { uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x163, x199, &x220);
+ { uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x166, x202, &x223);
+ { uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x169, x205, &x226);
+ { uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x172, x208, &x229);
+ { uint8_t x231 = (x230 + x173);
+ { uint32_t x234; uint32_t x233 = _mulx_u32(x9, x15, &x234);
+ { uint32_t x237; uint32_t x236 = _mulx_u32(x9, x17, &x237);
+ { uint32_t x240; uint32_t x239 = _mulx_u32(x9, x19, &x240);
+ { uint32_t x243; uint32_t x242 = _mulx_u32(x9, x21, &x243);
+ { uint32_t x246; uint32_t x245 = _mulx_u32(x9, x23, &x246);
+ { uint32_t x249; uint32_t x248 = _mulx_u32(x9, x22, &x249);
+ { uint32_t x251; uint8_t x252 = _addcarryx_u32(0x0, x234, x236, &x251);
+ { uint32_t x254; uint8_t x255 = _addcarryx_u32(x252, x237, x239, &x254);
+ { uint32_t x257; uint8_t x258 = _addcarryx_u32(x255, x240, x242, &x257);
+ { uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260);
+ { uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263);
+ { uint32_t x266; uint8_t _ = _addcarryx_u32(0x0, x264, x249, &x266);
+ { uint32_t x269; uint8_t x270 = _addcarryx_u32(0x0, x214, x233, &x269);
+ { uint32_t x272; uint8_t x273 = _addcarryx_u32(x270, x217, x251, &x272);
+ { uint32_t x275; uint8_t x276 = _addcarryx_u32(x273, x220, x254, &x275);
+ { uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278);
+ { uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281);
+ { uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284);
+ { uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x231, x266, &x287);
+ { uint32_t x291; uint32_t x290 = _mulx_u32(x269, 0xffffffff, &x291);
+ { uint32_t x294; uint32_t x293 = _mulx_u32(x269, 0xffffffff, &x294);
+ { uint32_t x297; uint32_t x296 = _mulx_u32(x269, 0xfffffffe, &x297);
+ { uint32_t x300; uint32_t x299 = _mulx_u32(x269, 0xffffffff, &x300);
+ { uint32_t x303; uint32_t x302 = _mulx_u32(x269, 0xffffffff, &x303);
+ { uint32_t x306; uint32_t x305 = _mulx_u32(x269, 0xffffffff, &x306);
+ { uint32_t x308; uint8_t x309 = _addcarryx_u32(0x0, x291, x293, &x308);
+ { uint32_t x311; uint8_t x312 = _addcarryx_u32(x309, x294, x296, &x311);
+ { uint32_t x314; uint8_t x315 = _addcarryx_u32(x312, x297, x299, &x314);
+ { uint32_t x317; uint8_t x318 = _addcarryx_u32(x315, x300, x302, &x317);
+ { uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320);
+ { uint32_t x323; uint8_t _ = _addcarryx_u32(0x0, x321, x306, &x323);
+ { uint32_t _; uint8_t x327 = _addcarryx_u32(0x0, x269, x290, &_);
+ { uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x272, x308, &x329);
+ { uint32_t x332; uint8_t x333 = _addcarryx_u32(x330, x275, x311, &x332);
+ { uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x278, x314, &x335);
+ { uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x281, x317, &x338);
+ { uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x284, x320, &x341);
+ { uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x287, x323, &x344);
+ { uint8_t x346 = (x345 + x288);
+ { uint32_t x349; uint32_t x348 = _mulx_u32(x11, x15, &x349);
+ { uint32_t x352; uint32_t x351 = _mulx_u32(x11, x17, &x352);
+ { uint32_t x355; uint32_t x354 = _mulx_u32(x11, x19, &x355);
+ { uint32_t x358; uint32_t x357 = _mulx_u32(x11, x21, &x358);
+ { uint32_t x361; uint32_t x360 = _mulx_u32(x11, x23, &x361);
+ { uint32_t x364; uint32_t x363 = _mulx_u32(x11, x22, &x364);
+ { uint32_t x366; uint8_t x367 = _addcarryx_u32(0x0, x349, x351, &x366);
+ { uint32_t x369; uint8_t x370 = _addcarryx_u32(x367, x352, x354, &x369);
+ { uint32_t x372; uint8_t x373 = _addcarryx_u32(x370, x355, x357, &x372);
+ { uint32_t x375; uint8_t x376 = _addcarryx_u32(x373, x358, x360, &x375);
+ { uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
+ { uint32_t x381; uint8_t _ = _addcarryx_u32(0x0, x379, x364, &x381);
+ { uint32_t x384; uint8_t x385 = _addcarryx_u32(0x0, x329, x348, &x384);
+ { uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x332, x366, &x387);
+ { uint32_t x390; uint8_t x391 = _addcarryx_u32(x388, x335, x369, &x390);
+ { uint32_t x393; uint8_t x394 = _addcarryx_u32(x391, x338, x372, &x393);
+ { uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
+ { uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
+ { uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x346, x381, &x402);
+ { uint32_t x406; uint32_t x405 = _mulx_u32(x384, 0xffffffff, &x406);
+ { uint32_t x409; uint32_t x408 = _mulx_u32(x384, 0xffffffff, &x409);
+ { uint32_t x412; uint32_t x411 = _mulx_u32(x384, 0xfffffffe, &x412);
+ { uint32_t x415; uint32_t x414 = _mulx_u32(x384, 0xffffffff, &x415);
+ { uint32_t x418; uint32_t x417 = _mulx_u32(x384, 0xffffffff, &x418);
+ { uint32_t x421; uint32_t x420 = _mulx_u32(x384, 0xffffffff, &x421);
+ { uint32_t x423; uint8_t x424 = _addcarryx_u32(0x0, x406, x408, &x423);
+ { uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x409, x411, &x426);
+ { uint32_t x429; uint8_t x430 = _addcarryx_u32(x427, x412, x414, &x429);
+ { uint32_t x432; uint8_t x433 = _addcarryx_u32(x430, x415, x417, &x432);
+ { uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x418, x420, &x435);
+ { uint32_t x438; uint8_t _ = _addcarryx_u32(0x0, x436, x421, &x438);
+ { uint32_t _; uint8_t x442 = _addcarryx_u32(0x0, x384, x405, &_);
+ { uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x387, x423, &x444);
+ { uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x390, x426, &x447);
+ { uint32_t x450; uint8_t x451 = _addcarryx_u32(x448, x393, x429, &x450);
+ { uint32_t x453; uint8_t x454 = _addcarryx_u32(x451, x396, x432, &x453);
+ { uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x399, x435, &x456);
+ { uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x402, x438, &x459);
+ { uint8_t x461 = (x460 + x403);
+ { uint32_t x464; uint32_t x463 = _mulx_u32(x13, x15, &x464);
+ { uint32_t x467; uint32_t x466 = _mulx_u32(x13, x17, &x467);
+ { uint32_t x470; uint32_t x469 = _mulx_u32(x13, x19, &x470);
+ { uint32_t x473; uint32_t x472 = _mulx_u32(x13, x21, &x473);
+ { uint32_t x476; uint32_t x475 = _mulx_u32(x13, x23, &x476);
+ { uint32_t x479; uint32_t x478 = _mulx_u32(x13, x22, &x479);
+ { uint32_t x481; uint8_t x482 = _addcarryx_u32(0x0, x464, x466, &x481);
+ { uint32_t x484; uint8_t x485 = _addcarryx_u32(x482, x467, x469, &x484);
+ { uint32_t x487; uint8_t x488 = _addcarryx_u32(x485, x470, x472, &x487);
+ { uint32_t x490; uint8_t x491 = _addcarryx_u32(x488, x473, x475, &x490);
+ { uint32_t x493; uint8_t x494 = _addcarryx_u32(x491, x476, x478, &x493);
+ { uint32_t x496; uint8_t _ = _addcarryx_u32(0x0, x494, x479, &x496);
+ { uint32_t x499; uint8_t x500 = _addcarryx_u32(0x0, x444, x463, &x499);
+ { uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x447, x481, &x502);
+ { uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x450, x484, &x505);
+ { uint32_t x508; uint8_t x509 = _addcarryx_u32(x506, x453, x487, &x508);
+ { uint32_t x511; uint8_t x512 = _addcarryx_u32(x509, x456, x490, &x511);
+ { uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514);
+ { uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x461, x496, &x517);
+ { uint32_t x521; uint32_t x520 = _mulx_u32(x499, 0xffffffff, &x521);
+ { uint32_t x524; uint32_t x523 = _mulx_u32(x499, 0xffffffff, &x524);
+ { uint32_t x527; uint32_t x526 = _mulx_u32(x499, 0xfffffffe, &x527);
+ { uint32_t x530; uint32_t x529 = _mulx_u32(x499, 0xffffffff, &x530);
+ { uint32_t x533; uint32_t x532 = _mulx_u32(x499, 0xffffffff, &x533);
+ { uint32_t x536; uint32_t x535 = _mulx_u32(x499, 0xffffffff, &x536);
+ { uint32_t x538; uint8_t x539 = _addcarryx_u32(0x0, x521, x523, &x538);
+ { uint32_t x541; uint8_t x542 = _addcarryx_u32(x539, x524, x526, &x541);
+ { uint32_t x544; uint8_t x545 = _addcarryx_u32(x542, x527, x529, &x544);
+ { uint32_t x547; uint8_t x548 = _addcarryx_u32(x545, x530, x532, &x547);
+ { uint32_t x550; uint8_t x551 = _addcarryx_u32(x548, x533, x535, &x550);
+ { uint32_t x553; uint8_t _ = _addcarryx_u32(0x0, x551, x536, &x553);
+ { uint32_t _; uint8_t x557 = _addcarryx_u32(0x0, x499, x520, &_);
+ { uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x502, x538, &x559);
+ { uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x505, x541, &x562);
+ { uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x508, x544, &x565);
+ { uint32_t x568; uint8_t x569 = _addcarryx_u32(x566, x511, x547, &x568);
+ { uint32_t x571; uint8_t x572 = _addcarryx_u32(x569, x514, x550, &x571);
+ { uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x517, x553, &x574);
+ { uint8_t x576 = (x575 + x518);
+ { uint32_t x579; uint32_t x578 = _mulx_u32(x12, x15, &x579);
+ { uint32_t x582; uint32_t x581 = _mulx_u32(x12, x17, &x582);
+ { uint32_t x585; uint32_t x584 = _mulx_u32(x12, x19, &x585);
+ { uint32_t x588; uint32_t x587 = _mulx_u32(x12, x21, &x588);
+ { uint32_t x591; uint32_t x590 = _mulx_u32(x12, x23, &x591);
+ { uint32_t x594; uint32_t x593 = _mulx_u32(x12, x22, &x594);
+ { uint32_t x596; uint8_t x597 = _addcarryx_u32(0x0, x579, x581, &x596);
+ { uint32_t x599; uint8_t x600 = _addcarryx_u32(x597, x582, x584, &x599);
+ { uint32_t x602; uint8_t x603 = _addcarryx_u32(x600, x585, x587, &x602);
+ { uint32_t x605; uint8_t x606 = _addcarryx_u32(x603, x588, x590, &x605);
+ { uint32_t x608; uint8_t x609 = _addcarryx_u32(x606, x591, x593, &x608);
+ { uint32_t x611; uint8_t _ = _addcarryx_u32(0x0, x609, x594, &x611);
+ { uint32_t x614; uint8_t x615 = _addcarryx_u32(0x0, x559, x578, &x614);
+ { uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x562, x596, &x617);
+ { uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x565, x599, &x620);
+ { uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x568, x602, &x623);
+ { uint32_t x626; uint8_t x627 = _addcarryx_u32(x624, x571, x605, &x626);
+ { uint32_t x629; uint8_t x630 = _addcarryx_u32(x627, x574, x608, &x629);
+ { uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x576, x611, &x632);
+ { uint32_t x636; uint32_t x635 = _mulx_u32(x614, 0xffffffff, &x636);
+ { uint32_t x639; uint32_t x638 = _mulx_u32(x614, 0xffffffff, &x639);
+ { uint32_t x642; uint32_t x641 = _mulx_u32(x614, 0xfffffffe, &x642);
+ { uint32_t x645; uint32_t x644 = _mulx_u32(x614, 0xffffffff, &x645);
+ { uint32_t x648; uint32_t x647 = _mulx_u32(x614, 0xffffffff, &x648);
+ { uint32_t x651; uint32_t x650 = _mulx_u32(x614, 0xffffffff, &x651);
+ { uint32_t x653; uint8_t x654 = _addcarryx_u32(0x0, x636, x638, &x653);
+ { uint32_t x656; uint8_t x657 = _addcarryx_u32(x654, x639, x641, &x656);
+ { uint32_t x659; uint8_t x660 = _addcarryx_u32(x657, x642, x644, &x659);
+ { uint32_t x662; uint8_t x663 = _addcarryx_u32(x660, x645, x647, &x662);
+ { uint32_t x665; uint8_t x666 = _addcarryx_u32(x663, x648, x650, &x665);
+ { uint32_t x668; uint8_t _ = _addcarryx_u32(0x0, x666, x651, &x668);
+ { uint32_t _; uint8_t x672 = _addcarryx_u32(0x0, x614, x635, &_);
+ { uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x617, x653, &x674);
+ { uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x620, x656, &x677);
+ { uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x623, x659, &x680);
+ { uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x626, x662, &x683);
+ { uint32_t x686; uint8_t x687 = _addcarryx_u32(x684, x629, x665, &x686);
+ { uint32_t x689; uint8_t x690 = _addcarryx_u32(x687, x632, x668, &x689);
+ { uint8_t x691 = (x690 + x633);
+ { uint32_t x693; uint8_t x694 = _subborrow_u32(0x0, x674, 0xffffffff, &x693);
+ { uint32_t x696; uint8_t x697 = _subborrow_u32(x694, x677, 0xffffffff, &x696);
+ { uint32_t x699; uint8_t x700 = _subborrow_u32(x697, x680, 0xfffffffe, &x699);
+ { uint32_t x702; uint8_t x703 = _subborrow_u32(x700, x683, 0xffffffff, &x702);
+ { uint32_t x705; uint8_t x706 = _subborrow_u32(x703, x686, 0xffffffff, &x705);
+ { uint32_t x708; uint8_t x709 = _subborrow_u32(x706, x689, 0xffffffff, &x708);
+ { uint32_t _; uint8_t x712 = _subborrow_u32(x709, x691, 0x0, &_);
+ { uint32_t x713 = cmovznz(x712, x708, x689);
+ { uint32_t x714 = cmovznz(x712, x705, x686);
+ { uint32_t x715 = cmovznz(x712, x702, x683);
+ { uint32_t x716 = cmovznz(x712, x699, x680);
+ { uint32_t x717 = cmovznz(x712, x696, x677);
+ { uint32_t x718 = cmovznz(x712, x693, x674);
+ out[0] = x718;
+ out[1] = x717;
+ out[2] = x716;
+ out[3] = x715;
+ out[4] = x714;
+ out[5] = x713;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e192m2e64m1/fenz.c b/src/Specific/montgomery32_2e192m2e64m1/fenz.c
index 6d8132b20..2e0454af1 100644
--- a/src/Specific/montgomery32_2e192m2e64m1/fenz.c
+++ b/src/Specific/montgomery32_2e192m2e64m1/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x11 = (x10 | x9);
-{ uint32_t x12 = (x8 | x11);
-{ uint32_t x13 = (x6 | x12);
-{ uint32_t x14 = (x4 | x13);
-{ uint32_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x11 = (x10 | x9);
+ { uint32_t x12 = (x8 | x11);
+ { uint32_t x13 = (x6 | x12);
+ { uint32_t x14 = (x4 | x13);
+ { uint32_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e192m2e64m1/feopp.c b/src/Specific/montgomery32_2e192m2e64m1/feopp.c
index c55b28253..a0f780530 100644
--- a/src/Specific/montgomery32_2e192m2e64m1/feopp.c
+++ b/src/Specific/montgomery32_2e192m2e64m1/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
-{ uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
-{ uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
-{ uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
-{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
-{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
-{ uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
-{ uint32_t x30 = (x29 & 0xffffffff);
-{ uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
-{ uint32_t x34 = (x29 & 0xffffffff);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
-{ uint32_t x38 = (x29 & 0xfffffffe);
-{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
-{ uint32_t x42 = (x29 & 0xffffffff);
-{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
-{ uint32_t x46 = (x29 & 0xffffffff);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
-{ uint32_t x50 = (x29 & 0xffffffff);
-{ uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
+ { uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
+ { uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
+ { uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
+ { uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
+ { uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
+ { uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+ { uint32_t x30 = (x29 & 0xffffffff);
+ { uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
+ { uint32_t x34 = (x29 & 0xffffffff);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
+ { uint32_t x38 = (x29 & 0xfffffffe);
+ { uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
+ { uint32_t x42 = (x29 & 0xffffffff);
+ { uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
+ { uint32_t x46 = (x29 & 0xffffffff);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
+ { uint32_t x50 = (x29 & 0xffffffff);
+ { uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e192m2e64m1/fesub.c b/src/Specific/montgomery32_2e192m2e64m1/fesub.c
index 252303502..ea5bdee8f 100644
--- a/src/Specific/montgomery32_2e192m2e64m1/fesub.c
+++ b/src/Specific/montgomery32_2e192m2e64m1/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
-{ uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
-{ uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
-{ uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
-{ uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
-{ uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
-{ uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
-{ uint32_t x43 = (x42 & 0xffffffff);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
-{ uint32_t x47 = (x42 & 0xffffffff);
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
-{ uint32_t x51 = (x42 & 0xfffffffe);
-{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
-{ uint32_t x55 = (x42 & 0xffffffff);
-{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
-{ uint32_t x59 = (x42 & 0xffffffff);
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
-{ uint32_t x63 = (x42 & 0xffffffff);
-{ uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
+ { uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
+ { uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
+ { uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
+ { uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
+ { uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
+ { uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
+ { uint32_t x43 = (x42 & 0xffffffff);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
+ { uint32_t x47 = (x42 & 0xffffffff);
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
+ { uint32_t x51 = (x42 & 0xfffffffe);
+ { uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
+ { uint32_t x55 = (x42 & 0xffffffff);
+ { uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
+ { uint32_t x59 = (x42 & 0xffffffff);
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
+ { uint32_t x63 = (x42 & 0xffffffff);
+ { uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e194m33/feadd.c b/src/Specific/montgomery32_2e194m33/feadd.c
index 552319502..28779018c 100644
--- a/src/Specific/montgomery32_2e194m33/feadd.c
+++ b/src/Specific/montgomery32_2e194m33/feadd.c
@@ -1,50 +1,46 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
-{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
-{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
-{ uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
-{ uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
-{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
-{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
-{ uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xffffffdf, &x50);
-{ uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
-{ uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
-{ uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
-{ uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
-{ uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
-{ uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0x3, &x68);
-{ uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
-{ uint32_t x73 = cmovznz(x72, x68, x47);
-{ uint32_t x74 = cmovznz(x72, x65, x44);
-{ uint32_t x75 = cmovznz(x72, x62, x41);
-{ uint32_t x76 = cmovznz(x72, x59, x38);
-{ uint32_t x77 = cmovznz(x72, x56, x35);
-{ uint32_t x78 = cmovznz(x72, x53, x32);
-{ uint32_t x79 = cmovznz(x72, x50, x29);
-out[0] = x73;
-out[1] = x74;
-out[2] = x75;
-out[3] = x76;
-out[4] = x77;
-out[5] = x78;
-out[6] = x79;
-}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void feadd(uint32_t out[7], const uint32_t in1[7], const uint32_t in2[7]) {
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x26 = in2[6];
+ { const uint32_t x27 = in2[5];
+ { const uint32_t x25 = in2[4];
+ { const uint32_t x23 = in2[3];
+ { const uint32_t x21 = in2[2];
+ { const uint32_t x19 = in2[1];
+ { const uint32_t x17 = in2[0];
+ { uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
+ { uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
+ { uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
+ { uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
+ { uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
+ { uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
+ { uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
+ { uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xffffffdf, &x50);
+ { uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
+ { uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
+ { uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
+ { uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
+ { uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
+ { uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0x3, &x68);
+ { uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
+ { uint32_t x73 = cmovznz(x72, x68, x47);
+ { uint32_t x74 = cmovznz(x72, x65, x44);
+ { uint32_t x75 = cmovznz(x72, x62, x41);
+ { uint32_t x76 = cmovznz(x72, x59, x38);
+ { uint32_t x77 = cmovznz(x72, x56, x35);
+ { uint32_t x78 = cmovznz(x72, x53, x32);
+ { uint32_t x79 = cmovznz(x72, x50, x29);
+ out[0] = x79;
+ out[1] = x78;
+ out[2] = x77;
+ out[3] = x76;
+ out[4] = x75;
+ out[5] = x74;
+ out[6] = x73;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e194m33/fenz.c b/src/Specific/montgomery32_2e194m33/fenz.c
index a6516fa7a..387dcfe67 100644
--- a/src/Specific/montgomery32_2e194m33/fenz.c
+++ b/src/Specific/montgomery32_2e194m33/fenz.c
@@ -1,28 +1,17 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x13 = (x12 | x11);
-{ uint32_t x14 = (x10 | x13);
-{ uint32_t x15 = (x8 | x14);
-{ uint32_t x16 = (x6 | x15);
-{ uint32_t x17 = (x4 | x16);
-{ uint32_t x18 = (x2 | x17);
-out[0] = x18;
-}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[7]) {
+ { const uint32_t x11 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x13 = (x12 | x11);
+ { uint32_t x14 = (x10 | x13);
+ { uint32_t x15 = (x8 | x14);
+ { uint32_t x16 = (x6 | x15);
+ { uint32_t x17 = (x4 | x16);
+ { uint32_t x18 = (x2 | x17);
+ out[0] = x18;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e196m15/feadd.c b/src/Specific/montgomery32_2e196m15/feadd.c
index 9814fd24f..eaaaae916 100644
--- a/src/Specific/montgomery32_2e196m15/feadd.c
+++ b/src/Specific/montgomery32_2e196m15/feadd.c
@@ -1,50 +1,46 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
-{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
-{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
-{ uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
-{ uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
-{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
-{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
-{ uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xfffffff1, &x50);
-{ uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
-{ uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
-{ uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
-{ uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
-{ uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
-{ uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0xf, &x68);
-{ uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
-{ uint32_t x73 = cmovznz(x72, x68, x47);
-{ uint32_t x74 = cmovznz(x72, x65, x44);
-{ uint32_t x75 = cmovznz(x72, x62, x41);
-{ uint32_t x76 = cmovznz(x72, x59, x38);
-{ uint32_t x77 = cmovznz(x72, x56, x35);
-{ uint32_t x78 = cmovznz(x72, x53, x32);
-{ uint32_t x79 = cmovznz(x72, x50, x29);
-out[0] = x73;
-out[1] = x74;
-out[2] = x75;
-out[3] = x76;
-out[4] = x77;
-out[5] = x78;
-out[6] = x79;
-}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void feadd(uint32_t out[7], const uint32_t in1[7], const uint32_t in2[7]) {
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x26 = in2[6];
+ { const uint32_t x27 = in2[5];
+ { const uint32_t x25 = in2[4];
+ { const uint32_t x23 = in2[3];
+ { const uint32_t x21 = in2[2];
+ { const uint32_t x19 = in2[1];
+ { const uint32_t x17 = in2[0];
+ { uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
+ { uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
+ { uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
+ { uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
+ { uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
+ { uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
+ { uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
+ { uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xfffffff1, &x50);
+ { uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
+ { uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
+ { uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
+ { uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
+ { uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
+ { uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0xf, &x68);
+ { uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
+ { uint32_t x73 = cmovznz(x72, x68, x47);
+ { uint32_t x74 = cmovznz(x72, x65, x44);
+ { uint32_t x75 = cmovznz(x72, x62, x41);
+ { uint32_t x76 = cmovznz(x72, x59, x38);
+ { uint32_t x77 = cmovznz(x72, x56, x35);
+ { uint32_t x78 = cmovznz(x72, x53, x32);
+ { uint32_t x79 = cmovznz(x72, x50, x29);
+ out[0] = x79;
+ out[1] = x78;
+ out[2] = x77;
+ out[3] = x76;
+ out[4] = x75;
+ out[5] = x74;
+ out[6] = x73;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e196m15/fenz.c b/src/Specific/montgomery32_2e196m15/fenz.c
index a6516fa7a..387dcfe67 100644
--- a/src/Specific/montgomery32_2e196m15/fenz.c
+++ b/src/Specific/montgomery32_2e196m15/fenz.c
@@ -1,28 +1,17 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x13 = (x12 | x11);
-{ uint32_t x14 = (x10 | x13);
-{ uint32_t x15 = (x8 | x14);
-{ uint32_t x16 = (x6 | x15);
-{ uint32_t x17 = (x4 | x16);
-{ uint32_t x18 = (x2 | x17);
-out[0] = x18;
-}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[7]) {
+ { const uint32_t x11 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x13 = (x12 | x11);
+ { uint32_t x14 = (x10 | x13);
+ { uint32_t x15 = (x8 | x14);
+ { uint32_t x16 = (x6 | x15);
+ { uint32_t x17 = (x4 | x16);
+ { uint32_t x18 = (x2 | x17);
+ out[0] = x18;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e198m17/feadd.c b/src/Specific/montgomery32_2e198m17/feadd.c
index 95d09c821..d2b70ab1b 100644
--- a/src/Specific/montgomery32_2e198m17/feadd.c
+++ b/src/Specific/montgomery32_2e198m17/feadd.c
@@ -1,50 +1,46 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
-{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
-{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
-{ uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
-{ uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
-{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
-{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
-{ uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xffffffef, &x50);
-{ uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
-{ uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
-{ uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
-{ uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
-{ uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
-{ uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0x3f, &x68);
-{ uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
-{ uint32_t x73 = cmovznz(x72, x68, x47);
-{ uint32_t x74 = cmovznz(x72, x65, x44);
-{ uint32_t x75 = cmovznz(x72, x62, x41);
-{ uint32_t x76 = cmovznz(x72, x59, x38);
-{ uint32_t x77 = cmovznz(x72, x56, x35);
-{ uint32_t x78 = cmovznz(x72, x53, x32);
-{ uint32_t x79 = cmovznz(x72, x50, x29);
-out[0] = x73;
-out[1] = x74;
-out[2] = x75;
-out[3] = x76;
-out[4] = x77;
-out[5] = x78;
-out[6] = x79;
-}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void feadd(uint32_t out[7], const uint32_t in1[7], const uint32_t in2[7]) {
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x26 = in2[6];
+ { const uint32_t x27 = in2[5];
+ { const uint32_t x25 = in2[4];
+ { const uint32_t x23 = in2[3];
+ { const uint32_t x21 = in2[2];
+ { const uint32_t x19 = in2[1];
+ { const uint32_t x17 = in2[0];
+ { uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
+ { uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
+ { uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
+ { uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
+ { uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
+ { uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
+ { uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
+ { uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xffffffef, &x50);
+ { uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
+ { uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
+ { uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
+ { uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
+ { uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
+ { uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0x3f, &x68);
+ { uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
+ { uint32_t x73 = cmovznz(x72, x68, x47);
+ { uint32_t x74 = cmovznz(x72, x65, x44);
+ { uint32_t x75 = cmovznz(x72, x62, x41);
+ { uint32_t x76 = cmovznz(x72, x59, x38);
+ { uint32_t x77 = cmovznz(x72, x56, x35);
+ { uint32_t x78 = cmovznz(x72, x53, x32);
+ { uint32_t x79 = cmovznz(x72, x50, x29);
+ out[0] = x79;
+ out[1] = x78;
+ out[2] = x77;
+ out[3] = x76;
+ out[4] = x75;
+ out[5] = x74;
+ out[6] = x73;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e198m17/fenz.c b/src/Specific/montgomery32_2e198m17/fenz.c
index a6516fa7a..387dcfe67 100644
--- a/src/Specific/montgomery32_2e198m17/fenz.c
+++ b/src/Specific/montgomery32_2e198m17/fenz.c
@@ -1,28 +1,17 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x13 = (x12 | x11);
-{ uint32_t x14 = (x10 | x13);
-{ uint32_t x15 = (x8 | x14);
-{ uint32_t x16 = (x6 | x15);
-{ uint32_t x17 = (x4 | x16);
-{ uint32_t x18 = (x2 | x17);
-out[0] = x18;
-}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[7]) {
+ { const uint32_t x11 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x13 = (x12 | x11);
+ { uint32_t x14 = (x10 | x13);
+ { uint32_t x15 = (x8 | x14);
+ { uint32_t x16 = (x6 | x15);
+ { uint32_t x17 = (x4 | x16);
+ { uint32_t x18 = (x2 | x17);
+ out[0] = x18;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e205m45x2e198m1/feadd.c b/src/Specific/montgomery32_2e205m45x2e198m1/feadd.c
index 8233bfac7..bee7a9ecd 100644
--- a/src/Specific/montgomery32_2e205m45x2e198m1/feadd.c
+++ b/src/Specific/montgomery32_2e205m45x2e198m1/feadd.c
@@ -1,50 +1,46 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
-{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
-{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
-{ uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
-{ uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
-{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
-{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
-{ uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xffffffff, &x50);
-{ uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
-{ uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
-{ uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
-{ uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
-{ uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
-{ uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0x14bf, &x68);
-{ uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
-{ uint32_t x73 = cmovznz(x72, x68, x47);
-{ uint32_t x74 = cmovznz(x72, x65, x44);
-{ uint32_t x75 = cmovznz(x72, x62, x41);
-{ uint32_t x76 = cmovznz(x72, x59, x38);
-{ uint32_t x77 = cmovznz(x72, x56, x35);
-{ uint32_t x78 = cmovznz(x72, x53, x32);
-{ uint32_t x79 = cmovznz(x72, x50, x29);
-out[0] = x73;
-out[1] = x74;
-out[2] = x75;
-out[3] = x76;
-out[4] = x77;
-out[5] = x78;
-out[6] = x79;
-}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void feadd(uint32_t out[7], const uint32_t in1[7], const uint32_t in2[7]) {
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x26 = in2[6];
+ { const uint32_t x27 = in2[5];
+ { const uint32_t x25 = in2[4];
+ { const uint32_t x23 = in2[3];
+ { const uint32_t x21 = in2[2];
+ { const uint32_t x19 = in2[1];
+ { const uint32_t x17 = in2[0];
+ { uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
+ { uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
+ { uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
+ { uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
+ { uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
+ { uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
+ { uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
+ { uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xffffffff, &x50);
+ { uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
+ { uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
+ { uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
+ { uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
+ { uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
+ { uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0x14bf, &x68);
+ { uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
+ { uint32_t x73 = cmovznz(x72, x68, x47);
+ { uint32_t x74 = cmovznz(x72, x65, x44);
+ { uint32_t x75 = cmovznz(x72, x62, x41);
+ { uint32_t x76 = cmovznz(x72, x59, x38);
+ { uint32_t x77 = cmovznz(x72, x56, x35);
+ { uint32_t x78 = cmovznz(x72, x53, x32);
+ { uint32_t x79 = cmovznz(x72, x50, x29);
+ out[0] = x79;
+ out[1] = x78;
+ out[2] = x77;
+ out[3] = x76;
+ out[4] = x75;
+ out[5] = x74;
+ out[6] = x73;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e205m45x2e198m1/fenz.c b/src/Specific/montgomery32_2e205m45x2e198m1/fenz.c
index a6516fa7a..387dcfe67 100644
--- a/src/Specific/montgomery32_2e205m45x2e198m1/fenz.c
+++ b/src/Specific/montgomery32_2e205m45x2e198m1/fenz.c
@@ -1,28 +1,17 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x13 = (x12 | x11);
-{ uint32_t x14 = (x10 | x13);
-{ uint32_t x15 = (x8 | x14);
-{ uint32_t x16 = (x6 | x15);
-{ uint32_t x17 = (x4 | x16);
-{ uint32_t x18 = (x2 | x17);
-out[0] = x18;
-}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[7]) {
+ { const uint32_t x11 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x13 = (x12 | x11);
+ { uint32_t x14 = (x10 | x13);
+ { uint32_t x15 = (x8 | x14);
+ { uint32_t x16 = (x6 | x15);
+ { uint32_t x17 = (x4 | x16);
+ { uint32_t x18 = (x2 | x17);
+ out[0] = x18;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e206m5/feadd.c b/src/Specific/montgomery32_2e206m5/feadd.c
index d2dbcd052..108156828 100644
--- a/src/Specific/montgomery32_2e206m5/feadd.c
+++ b/src/Specific/montgomery32_2e206m5/feadd.c
@@ -1,50 +1,46 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
-{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
-{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
-{ uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
-{ uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
-{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
-{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
-{ uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xfffffffb, &x50);
-{ uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
-{ uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
-{ uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
-{ uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
-{ uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
-{ uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0x3fff, &x68);
-{ uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
-{ uint32_t x73 = cmovznz(x72, x68, x47);
-{ uint32_t x74 = cmovznz(x72, x65, x44);
-{ uint32_t x75 = cmovznz(x72, x62, x41);
-{ uint32_t x76 = cmovznz(x72, x59, x38);
-{ uint32_t x77 = cmovznz(x72, x56, x35);
-{ uint32_t x78 = cmovznz(x72, x53, x32);
-{ uint32_t x79 = cmovznz(x72, x50, x29);
-out[0] = x73;
-out[1] = x74;
-out[2] = x75;
-out[3] = x76;
-out[4] = x77;
-out[5] = x78;
-out[6] = x79;
-}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void feadd(uint32_t out[7], const uint32_t in1[7], const uint32_t in2[7]) {
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x26 = in2[6];
+ { const uint32_t x27 = in2[5];
+ { const uint32_t x25 = in2[4];
+ { const uint32_t x23 = in2[3];
+ { const uint32_t x21 = in2[2];
+ { const uint32_t x19 = in2[1];
+ { const uint32_t x17 = in2[0];
+ { uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
+ { uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
+ { uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
+ { uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
+ { uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
+ { uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
+ { uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
+ { uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xfffffffb, &x50);
+ { uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
+ { uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
+ { uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
+ { uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
+ { uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
+ { uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0x3fff, &x68);
+ { uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
+ { uint32_t x73 = cmovznz(x72, x68, x47);
+ { uint32_t x74 = cmovznz(x72, x65, x44);
+ { uint32_t x75 = cmovznz(x72, x62, x41);
+ { uint32_t x76 = cmovznz(x72, x59, x38);
+ { uint32_t x77 = cmovznz(x72, x56, x35);
+ { uint32_t x78 = cmovznz(x72, x53, x32);
+ { uint32_t x79 = cmovznz(x72, x50, x29);
+ out[0] = x79;
+ out[1] = x78;
+ out[2] = x77;
+ out[3] = x76;
+ out[4] = x75;
+ out[5] = x74;
+ out[6] = x73;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e206m5/fenz.c b/src/Specific/montgomery32_2e206m5/fenz.c
index a6516fa7a..387dcfe67 100644
--- a/src/Specific/montgomery32_2e206m5/fenz.c
+++ b/src/Specific/montgomery32_2e206m5/fenz.c
@@ -1,28 +1,17 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x13 = (x12 | x11);
-{ uint32_t x14 = (x10 | x13);
-{ uint32_t x15 = (x8 | x14);
-{ uint32_t x16 = (x6 | x15);
-{ uint32_t x17 = (x4 | x16);
-{ uint32_t x18 = (x2 | x17);
-out[0] = x18;
-}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[7]) {
+ { const uint32_t x11 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x13 = (x12 | x11);
+ { uint32_t x14 = (x10 | x13);
+ { uint32_t x15 = (x8 | x14);
+ { uint32_t x16 = (x6 | x15);
+ { uint32_t x17 = (x4 | x16);
+ { uint32_t x18 = (x2 | x17);
+ out[0] = x18;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e212m29/feadd.c b/src/Specific/montgomery32_2e212m29/feadd.c
index a5d7c69ae..7254aeede 100644
--- a/src/Specific/montgomery32_2e212m29/feadd.c
+++ b/src/Specific/montgomery32_2e212m29/feadd.c
@@ -1,50 +1,46 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
-{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
-{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
-{ uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
-{ uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
-{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
-{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
-{ uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xffffffe3, &x50);
-{ uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
-{ uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
-{ uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
-{ uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
-{ uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
-{ uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0xfffff, &x68);
-{ uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
-{ uint32_t x73 = cmovznz(x72, x68, x47);
-{ uint32_t x74 = cmovznz(x72, x65, x44);
-{ uint32_t x75 = cmovznz(x72, x62, x41);
-{ uint32_t x76 = cmovznz(x72, x59, x38);
-{ uint32_t x77 = cmovznz(x72, x56, x35);
-{ uint32_t x78 = cmovznz(x72, x53, x32);
-{ uint32_t x79 = cmovznz(x72, x50, x29);
-out[0] = x73;
-out[1] = x74;
-out[2] = x75;
-out[3] = x76;
-out[4] = x77;
-out[5] = x78;
-out[6] = x79;
-}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void feadd(uint32_t out[7], const uint32_t in1[7], const uint32_t in2[7]) {
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x26 = in2[6];
+ { const uint32_t x27 = in2[5];
+ { const uint32_t x25 = in2[4];
+ { const uint32_t x23 = in2[3];
+ { const uint32_t x21 = in2[2];
+ { const uint32_t x19 = in2[1];
+ { const uint32_t x17 = in2[0];
+ { uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
+ { uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
+ { uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
+ { uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
+ { uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
+ { uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
+ { uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
+ { uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xffffffe3, &x50);
+ { uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
+ { uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
+ { uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
+ { uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
+ { uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
+ { uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0xfffff, &x68);
+ { uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
+ { uint32_t x73 = cmovznz(x72, x68, x47);
+ { uint32_t x74 = cmovznz(x72, x65, x44);
+ { uint32_t x75 = cmovznz(x72, x62, x41);
+ { uint32_t x76 = cmovznz(x72, x59, x38);
+ { uint32_t x77 = cmovznz(x72, x56, x35);
+ { uint32_t x78 = cmovznz(x72, x53, x32);
+ { uint32_t x79 = cmovznz(x72, x50, x29);
+ out[0] = x79;
+ out[1] = x78;
+ out[2] = x77;
+ out[3] = x76;
+ out[4] = x75;
+ out[5] = x74;
+ out[6] = x73;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e212m29/fenz.c b/src/Specific/montgomery32_2e212m29/fenz.c
index a6516fa7a..387dcfe67 100644
--- a/src/Specific/montgomery32_2e212m29/fenz.c
+++ b/src/Specific/montgomery32_2e212m29/fenz.c
@@ -1,28 +1,17 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x13 = (x12 | x11);
-{ uint32_t x14 = (x10 | x13);
-{ uint32_t x15 = (x8 | x14);
-{ uint32_t x16 = (x6 | x15);
-{ uint32_t x17 = (x4 | x16);
-{ uint32_t x18 = (x2 | x17);
-out[0] = x18;
-}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[7]) {
+ { const uint32_t x11 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x13 = (x12 | x11);
+ { uint32_t x14 = (x10 | x13);
+ { uint32_t x15 = (x8 | x14);
+ { uint32_t x16 = (x6 | x15);
+ { uint32_t x17 = (x4 | x16);
+ { uint32_t x18 = (x2 | x17);
+ out[0] = x18;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e213m3/feadd.c b/src/Specific/montgomery32_2e213m3/feadd.c
index ae525568b..d456e7bf6 100644
--- a/src/Specific/montgomery32_2e213m3/feadd.c
+++ b/src/Specific/montgomery32_2e213m3/feadd.c
@@ -1,50 +1,46 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
-{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
-{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
-{ uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
-{ uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
-{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
-{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
-{ uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xfffffffd, &x50);
-{ uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
-{ uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
-{ uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
-{ uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
-{ uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
-{ uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0x1fffff, &x68);
-{ uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
-{ uint32_t x73 = cmovznz(x72, x68, x47);
-{ uint32_t x74 = cmovznz(x72, x65, x44);
-{ uint32_t x75 = cmovznz(x72, x62, x41);
-{ uint32_t x76 = cmovznz(x72, x59, x38);
-{ uint32_t x77 = cmovznz(x72, x56, x35);
-{ uint32_t x78 = cmovznz(x72, x53, x32);
-{ uint32_t x79 = cmovznz(x72, x50, x29);
-out[0] = x73;
-out[1] = x74;
-out[2] = x75;
-out[3] = x76;
-out[4] = x77;
-out[5] = x78;
-out[6] = x79;
-}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void feadd(uint32_t out[7], const uint32_t in1[7], const uint32_t in2[7]) {
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x26 = in2[6];
+ { const uint32_t x27 = in2[5];
+ { const uint32_t x25 = in2[4];
+ { const uint32_t x23 = in2[3];
+ { const uint32_t x21 = in2[2];
+ { const uint32_t x19 = in2[1];
+ { const uint32_t x17 = in2[0];
+ { uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
+ { uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
+ { uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
+ { uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
+ { uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
+ { uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
+ { uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
+ { uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xfffffffd, &x50);
+ { uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
+ { uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
+ { uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
+ { uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
+ { uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
+ { uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0x1fffff, &x68);
+ { uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
+ { uint32_t x73 = cmovznz(x72, x68, x47);
+ { uint32_t x74 = cmovznz(x72, x65, x44);
+ { uint32_t x75 = cmovznz(x72, x62, x41);
+ { uint32_t x76 = cmovznz(x72, x59, x38);
+ { uint32_t x77 = cmovznz(x72, x56, x35);
+ { uint32_t x78 = cmovznz(x72, x53, x32);
+ { uint32_t x79 = cmovznz(x72, x50, x29);
+ out[0] = x79;
+ out[1] = x78;
+ out[2] = x77;
+ out[3] = x76;
+ out[4] = x75;
+ out[5] = x74;
+ out[6] = x73;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e213m3/fenz.c b/src/Specific/montgomery32_2e213m3/fenz.c
index a6516fa7a..387dcfe67 100644
--- a/src/Specific/montgomery32_2e213m3/fenz.c
+++ b/src/Specific/montgomery32_2e213m3/fenz.c
@@ -1,28 +1,17 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x13 = (x12 | x11);
-{ uint32_t x14 = (x10 | x13);
-{ uint32_t x15 = (x8 | x14);
-{ uint32_t x16 = (x6 | x15);
-{ uint32_t x17 = (x4 | x16);
-{ uint32_t x18 = (x2 | x17);
-out[0] = x18;
-}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[7]) {
+ { const uint32_t x11 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x13 = (x12 | x11);
+ { uint32_t x14 = (x10 | x13);
+ { uint32_t x15 = (x8 | x14);
+ { uint32_t x16 = (x6 | x15);
+ { uint32_t x17 = (x4 | x16);
+ { uint32_t x18 = (x2 | x17);
+ out[0] = x18;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e216m2e108m1/feadd.c b/src/Specific/montgomery32_2e216m2e108m1/feadd.c
index b0e96bfa8..8e9dce267 100644
--- a/src/Specific/montgomery32_2e216m2e108m1/feadd.c
+++ b/src/Specific/montgomery32_2e216m2e108m1/feadd.c
@@ -1,50 +1,46 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
-{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
-{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
-{ uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
-{ uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
-{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
-{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
-{ uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xffffffff, &x50);
-{ uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
-{ uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
-{ uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffefff, &x59);
-{ uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
-{ uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
-{ uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0xffffff, &x68);
-{ uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
-{ uint32_t x73 = cmovznz(x72, x68, x47);
-{ uint32_t x74 = cmovznz(x72, x65, x44);
-{ uint32_t x75 = cmovznz(x72, x62, x41);
-{ uint32_t x76 = cmovznz(x72, x59, x38);
-{ uint32_t x77 = cmovznz(x72, x56, x35);
-{ uint32_t x78 = cmovznz(x72, x53, x32);
-{ uint32_t x79 = cmovznz(x72, x50, x29);
-out[0] = x73;
-out[1] = x74;
-out[2] = x75;
-out[3] = x76;
-out[4] = x77;
-out[5] = x78;
-out[6] = x79;
-}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void feadd(uint32_t out[7], const uint32_t in1[7], const uint32_t in2[7]) {
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x26 = in2[6];
+ { const uint32_t x27 = in2[5];
+ { const uint32_t x25 = in2[4];
+ { const uint32_t x23 = in2[3];
+ { const uint32_t x21 = in2[2];
+ { const uint32_t x19 = in2[1];
+ { const uint32_t x17 = in2[0];
+ { uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
+ { uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
+ { uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
+ { uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
+ { uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
+ { uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
+ { uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
+ { uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xffffffff, &x50);
+ { uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
+ { uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
+ { uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffefff, &x59);
+ { uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
+ { uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
+ { uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0xffffff, &x68);
+ { uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
+ { uint32_t x73 = cmovznz(x72, x68, x47);
+ { uint32_t x74 = cmovznz(x72, x65, x44);
+ { uint32_t x75 = cmovznz(x72, x62, x41);
+ { uint32_t x76 = cmovznz(x72, x59, x38);
+ { uint32_t x77 = cmovznz(x72, x56, x35);
+ { uint32_t x78 = cmovznz(x72, x53, x32);
+ { uint32_t x79 = cmovznz(x72, x50, x29);
+ out[0] = x79;
+ out[1] = x78;
+ out[2] = x77;
+ out[3] = x76;
+ out[4] = x75;
+ out[5] = x74;
+ out[6] = x73;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e216m2e108m1/fenz.c b/src/Specific/montgomery32_2e216m2e108m1/fenz.c
index a6516fa7a..387dcfe67 100644
--- a/src/Specific/montgomery32_2e216m2e108m1/fenz.c
+++ b/src/Specific/montgomery32_2e216m2e108m1/fenz.c
@@ -1,28 +1,17 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x13 = (x12 | x11);
-{ uint32_t x14 = (x10 | x13);
-{ uint32_t x15 = (x8 | x14);
-{ uint32_t x16 = (x6 | x15);
-{ uint32_t x17 = (x4 | x16);
-{ uint32_t x18 = (x2 | x17);
-out[0] = x18;
-}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[7]) {
+ { const uint32_t x11 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x13 = (x12 | x11);
+ { uint32_t x14 = (x10 | x13);
+ { uint32_t x15 = (x8 | x14);
+ { uint32_t x16 = (x6 | x15);
+ { uint32_t x17 = (x4 | x16);
+ { uint32_t x18 = (x2 | x17);
+ out[0] = x18;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e221m3/feadd.c b/src/Specific/montgomery32_2e221m3/feadd.c
index d43caf856..db31a1b9e 100644
--- a/src/Specific/montgomery32_2e221m3/feadd.c
+++ b/src/Specific/montgomery32_2e221m3/feadd.c
@@ -1,50 +1,46 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
-{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
-{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
-{ uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
-{ uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
-{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
-{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
-{ uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xfffffffd, &x50);
-{ uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
-{ uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
-{ uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
-{ uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
-{ uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
-{ uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0x1fffffff, &x68);
-{ uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
-{ uint32_t x73 = cmovznz(x72, x68, x47);
-{ uint32_t x74 = cmovznz(x72, x65, x44);
-{ uint32_t x75 = cmovznz(x72, x62, x41);
-{ uint32_t x76 = cmovznz(x72, x59, x38);
-{ uint32_t x77 = cmovznz(x72, x56, x35);
-{ uint32_t x78 = cmovznz(x72, x53, x32);
-{ uint32_t x79 = cmovznz(x72, x50, x29);
-out[0] = x73;
-out[1] = x74;
-out[2] = x75;
-out[3] = x76;
-out[4] = x77;
-out[5] = x78;
-out[6] = x79;
-}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void feadd(uint32_t out[7], const uint32_t in1[7], const uint32_t in2[7]) {
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x26 = in2[6];
+ { const uint32_t x27 = in2[5];
+ { const uint32_t x25 = in2[4];
+ { const uint32_t x23 = in2[3];
+ { const uint32_t x21 = in2[2];
+ { const uint32_t x19 = in2[1];
+ { const uint32_t x17 = in2[0];
+ { uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
+ { uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
+ { uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
+ { uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
+ { uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
+ { uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
+ { uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
+ { uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xfffffffd, &x50);
+ { uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
+ { uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
+ { uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
+ { uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
+ { uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
+ { uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0x1fffffff, &x68);
+ { uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
+ { uint32_t x73 = cmovznz(x72, x68, x47);
+ { uint32_t x74 = cmovznz(x72, x65, x44);
+ { uint32_t x75 = cmovznz(x72, x62, x41);
+ { uint32_t x76 = cmovznz(x72, x59, x38);
+ { uint32_t x77 = cmovznz(x72, x56, x35);
+ { uint32_t x78 = cmovznz(x72, x53, x32);
+ { uint32_t x79 = cmovznz(x72, x50, x29);
+ out[0] = x79;
+ out[1] = x78;
+ out[2] = x77;
+ out[3] = x76;
+ out[4] = x75;
+ out[5] = x74;
+ out[6] = x73;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e221m3/fenz.c b/src/Specific/montgomery32_2e221m3/fenz.c
index a6516fa7a..387dcfe67 100644
--- a/src/Specific/montgomery32_2e221m3/fenz.c
+++ b/src/Specific/montgomery32_2e221m3/fenz.c
@@ -1,28 +1,17 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x13 = (x12 | x11);
-{ uint32_t x14 = (x10 | x13);
-{ uint32_t x15 = (x8 | x14);
-{ uint32_t x16 = (x6 | x15);
-{ uint32_t x17 = (x4 | x16);
-{ uint32_t x18 = (x2 | x17);
-out[0] = x18;
-}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[7]) {
+ { const uint32_t x11 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x13 = (x12 | x11);
+ { uint32_t x14 = (x10 | x13);
+ { uint32_t x15 = (x8 | x14);
+ { uint32_t x16 = (x6 | x15);
+ { uint32_t x17 = (x4 | x16);
+ { uint32_t x18 = (x2 | x17);
+ out[0] = x18;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e222m117/feadd.c b/src/Specific/montgomery32_2e222m117/feadd.c
index ef4baa59b..8c8943b7b 100644
--- a/src/Specific/montgomery32_2e222m117/feadd.c
+++ b/src/Specific/montgomery32_2e222m117/feadd.c
@@ -1,50 +1,46 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
-{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
-{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
-{ uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
-{ uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
-{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
-{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
-{ uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xffffff8b, &x50);
-{ uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
-{ uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
-{ uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
-{ uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
-{ uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
-{ uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0x3fffffff, &x68);
-{ uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
-{ uint32_t x73 = cmovznz(x72, x68, x47);
-{ uint32_t x74 = cmovznz(x72, x65, x44);
-{ uint32_t x75 = cmovznz(x72, x62, x41);
-{ uint32_t x76 = cmovznz(x72, x59, x38);
-{ uint32_t x77 = cmovznz(x72, x56, x35);
-{ uint32_t x78 = cmovznz(x72, x53, x32);
-{ uint32_t x79 = cmovznz(x72, x50, x29);
-out[0] = x73;
-out[1] = x74;
-out[2] = x75;
-out[3] = x76;
-out[4] = x77;
-out[5] = x78;
-out[6] = x79;
-}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void feadd(uint32_t out[7], const uint32_t in1[7], const uint32_t in2[7]) {
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x26 = in2[6];
+ { const uint32_t x27 = in2[5];
+ { const uint32_t x25 = in2[4];
+ { const uint32_t x23 = in2[3];
+ { const uint32_t x21 = in2[2];
+ { const uint32_t x19 = in2[1];
+ { const uint32_t x17 = in2[0];
+ { uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
+ { uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
+ { uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
+ { uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
+ { uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
+ { uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
+ { uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
+ { uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xffffff8b, &x50);
+ { uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
+ { uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
+ { uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
+ { uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
+ { uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
+ { uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0x3fffffff, &x68);
+ { uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
+ { uint32_t x73 = cmovznz(x72, x68, x47);
+ { uint32_t x74 = cmovznz(x72, x65, x44);
+ { uint32_t x75 = cmovznz(x72, x62, x41);
+ { uint32_t x76 = cmovznz(x72, x59, x38);
+ { uint32_t x77 = cmovznz(x72, x56, x35);
+ { uint32_t x78 = cmovznz(x72, x53, x32);
+ { uint32_t x79 = cmovznz(x72, x50, x29);
+ out[0] = x79;
+ out[1] = x78;
+ out[2] = x77;
+ out[3] = x76;
+ out[4] = x75;
+ out[5] = x74;
+ out[6] = x73;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e222m117/fenz.c b/src/Specific/montgomery32_2e222m117/fenz.c
index a6516fa7a..387dcfe67 100644
--- a/src/Specific/montgomery32_2e222m117/fenz.c
+++ b/src/Specific/montgomery32_2e222m117/fenz.c
@@ -1,28 +1,17 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x13 = (x12 | x11);
-{ uint32_t x14 = (x10 | x13);
-{ uint32_t x15 = (x8 | x14);
-{ uint32_t x16 = (x6 | x15);
-{ uint32_t x17 = (x4 | x16);
-{ uint32_t x18 = (x2 | x17);
-out[0] = x18;
-}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[7]) {
+ { const uint32_t x11 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x13 = (x12 | x11);
+ { uint32_t x14 = (x10 | x13);
+ { uint32_t x15 = (x8 | x14);
+ { uint32_t x16 = (x6 | x15);
+ { uint32_t x17 = (x4 | x16);
+ { uint32_t x18 = (x2 | x17);
+ out[0] = x18;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e224m2e96p1/feadd.c b/src/Specific/montgomery32_2e224m2e96p1/feadd.c
index 8f088ce7a..dfe41b61b 100644
--- a/src/Specific/montgomery32_2e224m2e96p1/feadd.c
+++ b/src/Specific/montgomery32_2e224m2e96p1/feadd.c
@@ -1,50 +1,46 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
-{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
-{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
-{ uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
-{ uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
-{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
-{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
-{ uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0x1, &x50);
-{ uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0x0, &x53);
-{ uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0x0, &x56);
-{ uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
-{ uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
-{ uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
-{ uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0xffffffff, &x68);
-{ uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
-{ uint32_t x73 = cmovznz(x72, x68, x47);
-{ uint32_t x74 = cmovznz(x72, x65, x44);
-{ uint32_t x75 = cmovznz(x72, x62, x41);
-{ uint32_t x76 = cmovznz(x72, x59, x38);
-{ uint32_t x77 = cmovznz(x72, x56, x35);
-{ uint32_t x78 = cmovznz(x72, x53, x32);
-{ uint32_t x79 = cmovznz(x72, x50, x29);
-out[0] = x73;
-out[1] = x74;
-out[2] = x75;
-out[3] = x76;
-out[4] = x77;
-out[5] = x78;
-out[6] = x79;
-}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void feadd(uint32_t out[7], const uint32_t in1[7], const uint32_t in2[7]) {
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x26 = in2[6];
+ { const uint32_t x27 = in2[5];
+ { const uint32_t x25 = in2[4];
+ { const uint32_t x23 = in2[3];
+ { const uint32_t x21 = in2[2];
+ { const uint32_t x19 = in2[1];
+ { const uint32_t x17 = in2[0];
+ { uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
+ { uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
+ { uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
+ { uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
+ { uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
+ { uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
+ { uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
+ { uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0x1, &x50);
+ { uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0x0, &x53);
+ { uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0x0, &x56);
+ { uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
+ { uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
+ { uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
+ { uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0xffffffff, &x68);
+ { uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
+ { uint32_t x73 = cmovznz(x72, x68, x47);
+ { uint32_t x74 = cmovznz(x72, x65, x44);
+ { uint32_t x75 = cmovznz(x72, x62, x41);
+ { uint32_t x76 = cmovznz(x72, x59, x38);
+ { uint32_t x77 = cmovznz(x72, x56, x35);
+ { uint32_t x78 = cmovznz(x72, x53, x32);
+ { uint32_t x79 = cmovznz(x72, x50, x29);
+ out[0] = x79;
+ out[1] = x78;
+ out[2] = x77;
+ out[3] = x76;
+ out[4] = x75;
+ out[5] = x74;
+ out[6] = x73;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e224m2e96p1/femul.c b/src/Specific/montgomery32_2e224m2e96p1/femul.c
index 185a40c24..a08b0ab8c 100644
--- a/src/Specific/montgomery32_2e224m2e96p1/femul.c
+++ b/src/Specific/montgomery32_2e224m2e96p1/femul.c
@@ -1,328 +1,324 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint32_t x30; uint32_t x29 = _mulx_u32(x5, x17, &x30);
-{ uint32_t x33; uint32_t x32 = _mulx_u32(x5, x19, &x33);
-{ uint32_t x36; uint32_t x35 = _mulx_u32(x5, x21, &x36);
-{ uint32_t x39; uint32_t x38 = _mulx_u32(x5, x23, &x39);
-{ uint32_t x42; uint32_t x41 = _mulx_u32(x5, x25, &x42);
-{ uint32_t x45; uint32_t x44 = _mulx_u32(x5, x27, &x45);
-{ uint32_t x48; uint32_t x47 = _mulx_u32(x5, x26, &x48);
-{ uint32_t x50; uint8_t x51 = _addcarryx_u32(0x0, x30, x32, &x50);
-{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x51, x33, x35, &x53);
-{ uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x36, x38, &x56);
-{ uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x39, x41, &x59);
-{ uint32_t x62; uint8_t x63 = _addcarryx_u32(x60, x42, x44, &x62);
-{ uint32_t x65; uint8_t x66 = _addcarryx_u32(x63, x45, x47, &x65);
-{ uint32_t x68; uint8_t _ = _addcarryx_u32(0x0, x66, x48, &x68);
-{ uint32_t _; uint32_t x71 = _mulx_u32(x29, 0xffffffff, &_);
-{ uint32_t x75; uint32_t x74 = _mulx_u32(x71, 0xffffffff, &x75);
-{ uint32_t x78; uint32_t x77 = _mulx_u32(x71, 0xffffffff, &x78);
-{ uint32_t x81; uint32_t x80 = _mulx_u32(x71, 0xffffffff, &x81);
-{ uint32_t x84; uint32_t x83 = _mulx_u32(x71, 0xffffffff, &x84);
-{ uint8_t x85 = (0x0 + 0x0);
-{ uint8_t x86 = (0x0 + 0x0);
-{ uint32_t x88; uint8_t x89 = _addcarryx_u32(0x0, x75, x77, &x88);
-{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x78, x80, &x91);
-{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x81, x83, &x94);
-{ uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x84, &x97);
-{ uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x29, x71, &_);
-{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x50, x85, &x103);
-{ uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x53, x86, &x106);
-{ uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x56, x74, &x109);
-{ uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x59, x88, &x112);
-{ uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x62, x91, &x115);
-{ uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x65, x94, &x118);
-{ uint32_t x121; uint8_t x122 = _addcarryx_u32(x119, x68, x97, &x121);
-{ uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125);
-{ uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128);
-{ uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131);
-{ uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134);
-{ uint32_t x137; uint32_t x136 = _mulx_u32(x7, x25, &x137);
-{ uint32_t x140; uint32_t x139 = _mulx_u32(x7, x27, &x140);
-{ uint32_t x143; uint32_t x142 = _mulx_u32(x7, x26, &x143);
-{ uint32_t x145; uint8_t x146 = _addcarryx_u32(0x0, x125, x127, &x145);
-{ uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x128, x130, &x148);
-{ uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x131, x133, &x151);
-{ uint32_t x154; uint8_t x155 = _addcarryx_u32(x152, x134, x136, &x154);
-{ uint32_t x157; uint8_t x158 = _addcarryx_u32(x155, x137, x139, &x157);
-{ uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x140, x142, &x160);
-{ uint32_t x163; uint8_t _ = _addcarryx_u32(0x0, x161, x143, &x163);
-{ uint32_t x166; uint8_t x167 = _addcarryx_u32(0x0, x103, x124, &x166);
-{ uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x106, x145, &x169);
-{ uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x109, x148, &x172);
-{ uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x112, x151, &x175);
-{ uint32_t x178; uint8_t x179 = _addcarryx_u32(x176, x115, x154, &x178);
-{ uint32_t x181; uint8_t x182 = _addcarryx_u32(x179, x118, x157, &x181);
-{ uint32_t x184; uint8_t x185 = _addcarryx_u32(x182, x121, x160, &x184);
-{ uint32_t x187; uint8_t x188 = _addcarryx_u32(x185, x122, x163, &x187);
-{ uint32_t _; uint32_t x190 = _mulx_u32(x166, 0xffffffff, &_);
-{ uint32_t x194; uint32_t x193 = _mulx_u32(x190, 0xffffffff, &x194);
-{ uint32_t x197; uint32_t x196 = _mulx_u32(x190, 0xffffffff, &x197);
-{ uint32_t x200; uint32_t x199 = _mulx_u32(x190, 0xffffffff, &x200);
-{ uint32_t x203; uint32_t x202 = _mulx_u32(x190, 0xffffffff, &x203);
-{ uint8_t x204 = (0x0 + 0x0);
-{ uint8_t x205 = (0x0 + 0x0);
-{ uint32_t x207; uint8_t x208 = _addcarryx_u32(0x0, x194, x196, &x207);
-{ uint32_t x210; uint8_t x211 = _addcarryx_u32(x208, x197, x199, &x210);
-{ uint32_t x213; uint8_t x214 = _addcarryx_u32(x211, x200, x202, &x213);
-{ uint32_t x216; uint8_t _ = _addcarryx_u32(0x0, x214, x203, &x216);
-{ uint32_t _; uint8_t x220 = _addcarryx_u32(0x0, x166, x190, &_);
-{ uint32_t x222; uint8_t x223 = _addcarryx_u32(x220, x169, x204, &x222);
-{ uint32_t x225; uint8_t x226 = _addcarryx_u32(x223, x172, x205, &x225);
-{ uint32_t x228; uint8_t x229 = _addcarryx_u32(x226, x175, x193, &x228);
-{ uint32_t x231; uint8_t x232 = _addcarryx_u32(x229, x178, x207, &x231);
-{ uint32_t x234; uint8_t x235 = _addcarryx_u32(x232, x181, x210, &x234);
-{ uint32_t x237; uint8_t x238 = _addcarryx_u32(x235, x184, x213, &x237);
-{ uint32_t x240; uint8_t x241 = _addcarryx_u32(x238, x187, x216, &x240);
-{ uint8_t x242 = (x241 + x188);
-{ uint32_t x245; uint32_t x244 = _mulx_u32(x9, x17, &x245);
-{ uint32_t x248; uint32_t x247 = _mulx_u32(x9, x19, &x248);
-{ uint32_t x251; uint32_t x250 = _mulx_u32(x9, x21, &x251);
-{ uint32_t x254; uint32_t x253 = _mulx_u32(x9, x23, &x254);
-{ uint32_t x257; uint32_t x256 = _mulx_u32(x9, x25, &x257);
-{ uint32_t x260; uint32_t x259 = _mulx_u32(x9, x27, &x260);
-{ uint32_t x263; uint32_t x262 = _mulx_u32(x9, x26, &x263);
-{ uint32_t x265; uint8_t x266 = _addcarryx_u32(0x0, x245, x247, &x265);
-{ uint32_t x268; uint8_t x269 = _addcarryx_u32(x266, x248, x250, &x268);
-{ uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x251, x253, &x271);
-{ uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x254, x256, &x274);
-{ uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x257, x259, &x277);
-{ uint32_t x280; uint8_t x281 = _addcarryx_u32(x278, x260, x262, &x280);
-{ uint32_t x283; uint8_t _ = _addcarryx_u32(0x0, x281, x263, &x283);
-{ uint32_t x286; uint8_t x287 = _addcarryx_u32(0x0, x222, x244, &x286);
-{ uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x225, x265, &x289);
-{ uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x228, x268, &x292);
-{ uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x231, x271, &x295);
-{ uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x234, x274, &x298);
-{ uint32_t x301; uint8_t x302 = _addcarryx_u32(x299, x237, x277, &x301);
-{ uint32_t x304; uint8_t x305 = _addcarryx_u32(x302, x240, x280, &x304);
-{ uint32_t x307; uint8_t x308 = _addcarryx_u32(x305, x242, x283, &x307);
-{ uint32_t _; uint32_t x310 = _mulx_u32(x286, 0xffffffff, &_);
-{ uint32_t x314; uint32_t x313 = _mulx_u32(x310, 0xffffffff, &x314);
-{ uint32_t x317; uint32_t x316 = _mulx_u32(x310, 0xffffffff, &x317);
-{ uint32_t x320; uint32_t x319 = _mulx_u32(x310, 0xffffffff, &x320);
-{ uint32_t x323; uint32_t x322 = _mulx_u32(x310, 0xffffffff, &x323);
-{ uint8_t x324 = (0x0 + 0x0);
-{ uint8_t x325 = (0x0 + 0x0);
-{ uint32_t x327; uint8_t x328 = _addcarryx_u32(0x0, x314, x316, &x327);
-{ uint32_t x330; uint8_t x331 = _addcarryx_u32(x328, x317, x319, &x330);
-{ uint32_t x333; uint8_t x334 = _addcarryx_u32(x331, x320, x322, &x333);
-{ uint32_t x336; uint8_t _ = _addcarryx_u32(0x0, x334, x323, &x336);
-{ uint32_t _; uint8_t x340 = _addcarryx_u32(0x0, x286, x310, &_);
-{ uint32_t x342; uint8_t x343 = _addcarryx_u32(x340, x289, x324, &x342);
-{ uint32_t x345; uint8_t x346 = _addcarryx_u32(x343, x292, x325, &x345);
-{ uint32_t x348; uint8_t x349 = _addcarryx_u32(x346, x295, x313, &x348);
-{ uint32_t x351; uint8_t x352 = _addcarryx_u32(x349, x298, x327, &x351);
-{ uint32_t x354; uint8_t x355 = _addcarryx_u32(x352, x301, x330, &x354);
-{ uint32_t x357; uint8_t x358 = _addcarryx_u32(x355, x304, x333, &x357);
-{ uint32_t x360; uint8_t x361 = _addcarryx_u32(x358, x307, x336, &x360);
-{ uint8_t x362 = (x361 + x308);
-{ uint32_t x365; uint32_t x364 = _mulx_u32(x11, x17, &x365);
-{ uint32_t x368; uint32_t x367 = _mulx_u32(x11, x19, &x368);
-{ uint32_t x371; uint32_t x370 = _mulx_u32(x11, x21, &x371);
-{ uint32_t x374; uint32_t x373 = _mulx_u32(x11, x23, &x374);
-{ uint32_t x377; uint32_t x376 = _mulx_u32(x11, x25, &x377);
-{ uint32_t x380; uint32_t x379 = _mulx_u32(x11, x27, &x380);
-{ uint32_t x383; uint32_t x382 = _mulx_u32(x11, x26, &x383);
-{ uint32_t x385; uint8_t x386 = _addcarryx_u32(0x0, x365, x367, &x385);
-{ uint32_t x388; uint8_t x389 = _addcarryx_u32(x386, x368, x370, &x388);
-{ uint32_t x391; uint8_t x392 = _addcarryx_u32(x389, x371, x373, &x391);
-{ uint32_t x394; uint8_t x395 = _addcarryx_u32(x392, x374, x376, &x394);
-{ uint32_t x397; uint8_t x398 = _addcarryx_u32(x395, x377, x379, &x397);
-{ uint32_t x400; uint8_t x401 = _addcarryx_u32(x398, x380, x382, &x400);
-{ uint32_t x403; uint8_t _ = _addcarryx_u32(0x0, x401, x383, &x403);
-{ uint32_t x406; uint8_t x407 = _addcarryx_u32(0x0, x342, x364, &x406);
-{ uint32_t x409; uint8_t x410 = _addcarryx_u32(x407, x345, x385, &x409);
-{ uint32_t x412; uint8_t x413 = _addcarryx_u32(x410, x348, x388, &x412);
-{ uint32_t x415; uint8_t x416 = _addcarryx_u32(x413, x351, x391, &x415);
-{ uint32_t x418; uint8_t x419 = _addcarryx_u32(x416, x354, x394, &x418);
-{ uint32_t x421; uint8_t x422 = _addcarryx_u32(x419, x357, x397, &x421);
-{ uint32_t x424; uint8_t x425 = _addcarryx_u32(x422, x360, x400, &x424);
-{ uint32_t x427; uint8_t x428 = _addcarryx_u32(x425, x362, x403, &x427);
-{ uint32_t _; uint32_t x430 = _mulx_u32(x406, 0xffffffff, &_);
-{ uint32_t x434; uint32_t x433 = _mulx_u32(x430, 0xffffffff, &x434);
-{ uint32_t x437; uint32_t x436 = _mulx_u32(x430, 0xffffffff, &x437);
-{ uint32_t x440; uint32_t x439 = _mulx_u32(x430, 0xffffffff, &x440);
-{ uint32_t x443; uint32_t x442 = _mulx_u32(x430, 0xffffffff, &x443);
-{ uint8_t x444 = (0x0 + 0x0);
-{ uint8_t x445 = (0x0 + 0x0);
-{ uint32_t x447; uint8_t x448 = _addcarryx_u32(0x0, x434, x436, &x447);
-{ uint32_t x450; uint8_t x451 = _addcarryx_u32(x448, x437, x439, &x450);
-{ uint32_t x453; uint8_t x454 = _addcarryx_u32(x451, x440, x442, &x453);
-{ uint32_t x456; uint8_t _ = _addcarryx_u32(0x0, x454, x443, &x456);
-{ uint32_t _; uint8_t x460 = _addcarryx_u32(0x0, x406, x430, &_);
-{ uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x409, x444, &x462);
-{ uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x412, x445, &x465);
-{ uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x415, x433, &x468);
-{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x418, x447, &x471);
-{ uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x421, x450, &x474);
-{ uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x424, x453, &x477);
-{ uint32_t x480; uint8_t x481 = _addcarryx_u32(x478, x427, x456, &x480);
-{ uint8_t x482 = (x481 + x428);
-{ uint32_t x485; uint32_t x484 = _mulx_u32(x13, x17, &x485);
-{ uint32_t x488; uint32_t x487 = _mulx_u32(x13, x19, &x488);
-{ uint32_t x491; uint32_t x490 = _mulx_u32(x13, x21, &x491);
-{ uint32_t x494; uint32_t x493 = _mulx_u32(x13, x23, &x494);
-{ uint32_t x497; uint32_t x496 = _mulx_u32(x13, x25, &x497);
-{ uint32_t x500; uint32_t x499 = _mulx_u32(x13, x27, &x500);
-{ uint32_t x503; uint32_t x502 = _mulx_u32(x13, x26, &x503);
-{ uint32_t x505; uint8_t x506 = _addcarryx_u32(0x0, x485, x487, &x505);
-{ uint32_t x508; uint8_t x509 = _addcarryx_u32(x506, x488, x490, &x508);
-{ uint32_t x511; uint8_t x512 = _addcarryx_u32(x509, x491, x493, &x511);
-{ uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x494, x496, &x514);
-{ uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x497, x499, &x517);
-{ uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x500, x502, &x520);
-{ uint32_t x523; uint8_t _ = _addcarryx_u32(0x0, x521, x503, &x523);
-{ uint32_t x526; uint8_t x527 = _addcarryx_u32(0x0, x462, x484, &x526);
-{ uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x465, x505, &x529);
-{ uint32_t x532; uint8_t x533 = _addcarryx_u32(x530, x468, x508, &x532);
-{ uint32_t x535; uint8_t x536 = _addcarryx_u32(x533, x471, x511, &x535);
-{ uint32_t x538; uint8_t x539 = _addcarryx_u32(x536, x474, x514, &x538);
-{ uint32_t x541; uint8_t x542 = _addcarryx_u32(x539, x477, x517, &x541);
-{ uint32_t x544; uint8_t x545 = _addcarryx_u32(x542, x480, x520, &x544);
-{ uint32_t x547; uint8_t x548 = _addcarryx_u32(x545, x482, x523, &x547);
-{ uint32_t _; uint32_t x550 = _mulx_u32(x526, 0xffffffff, &_);
-{ uint32_t x554; uint32_t x553 = _mulx_u32(x550, 0xffffffff, &x554);
-{ uint32_t x557; uint32_t x556 = _mulx_u32(x550, 0xffffffff, &x557);
-{ uint32_t x560; uint32_t x559 = _mulx_u32(x550, 0xffffffff, &x560);
-{ uint32_t x563; uint32_t x562 = _mulx_u32(x550, 0xffffffff, &x563);
-{ uint8_t x564 = (0x0 + 0x0);
-{ uint8_t x565 = (0x0 + 0x0);
-{ uint32_t x567; uint8_t x568 = _addcarryx_u32(0x0, x554, x556, &x567);
-{ uint32_t x570; uint8_t x571 = _addcarryx_u32(x568, x557, x559, &x570);
-{ uint32_t x573; uint8_t x574 = _addcarryx_u32(x571, x560, x562, &x573);
-{ uint32_t x576; uint8_t _ = _addcarryx_u32(0x0, x574, x563, &x576);
-{ uint32_t _; uint8_t x580 = _addcarryx_u32(0x0, x526, x550, &_);
-{ uint32_t x582; uint8_t x583 = _addcarryx_u32(x580, x529, x564, &x582);
-{ uint32_t x585; uint8_t x586 = _addcarryx_u32(x583, x532, x565, &x585);
-{ uint32_t x588; uint8_t x589 = _addcarryx_u32(x586, x535, x553, &x588);
-{ uint32_t x591; uint8_t x592 = _addcarryx_u32(x589, x538, x567, &x591);
-{ uint32_t x594; uint8_t x595 = _addcarryx_u32(x592, x541, x570, &x594);
-{ uint32_t x597; uint8_t x598 = _addcarryx_u32(x595, x544, x573, &x597);
-{ uint32_t x600; uint8_t x601 = _addcarryx_u32(x598, x547, x576, &x600);
-{ uint8_t x602 = (x601 + x548);
-{ uint32_t x605; uint32_t x604 = _mulx_u32(x15, x17, &x605);
-{ uint32_t x608; uint32_t x607 = _mulx_u32(x15, x19, &x608);
-{ uint32_t x611; uint32_t x610 = _mulx_u32(x15, x21, &x611);
-{ uint32_t x614; uint32_t x613 = _mulx_u32(x15, x23, &x614);
-{ uint32_t x617; uint32_t x616 = _mulx_u32(x15, x25, &x617);
-{ uint32_t x620; uint32_t x619 = _mulx_u32(x15, x27, &x620);
-{ uint32_t x623; uint32_t x622 = _mulx_u32(x15, x26, &x623);
-{ uint32_t x625; uint8_t x626 = _addcarryx_u32(0x0, x605, x607, &x625);
-{ uint32_t x628; uint8_t x629 = _addcarryx_u32(x626, x608, x610, &x628);
-{ uint32_t x631; uint8_t x632 = _addcarryx_u32(x629, x611, x613, &x631);
-{ uint32_t x634; uint8_t x635 = _addcarryx_u32(x632, x614, x616, &x634);
-{ uint32_t x637; uint8_t x638 = _addcarryx_u32(x635, x617, x619, &x637);
-{ uint32_t x640; uint8_t x641 = _addcarryx_u32(x638, x620, x622, &x640);
-{ uint32_t x643; uint8_t _ = _addcarryx_u32(0x0, x641, x623, &x643);
-{ uint32_t x646; uint8_t x647 = _addcarryx_u32(0x0, x582, x604, &x646);
-{ uint32_t x649; uint8_t x650 = _addcarryx_u32(x647, x585, x625, &x649);
-{ uint32_t x652; uint8_t x653 = _addcarryx_u32(x650, x588, x628, &x652);
-{ uint32_t x655; uint8_t x656 = _addcarryx_u32(x653, x591, x631, &x655);
-{ uint32_t x658; uint8_t x659 = _addcarryx_u32(x656, x594, x634, &x658);
-{ uint32_t x661; uint8_t x662 = _addcarryx_u32(x659, x597, x637, &x661);
-{ uint32_t x664; uint8_t x665 = _addcarryx_u32(x662, x600, x640, &x664);
-{ uint32_t x667; uint8_t x668 = _addcarryx_u32(x665, x602, x643, &x667);
-{ uint32_t _; uint32_t x670 = _mulx_u32(x646, 0xffffffff, &_);
-{ uint32_t x674; uint32_t x673 = _mulx_u32(x670, 0xffffffff, &x674);
-{ uint32_t x677; uint32_t x676 = _mulx_u32(x670, 0xffffffff, &x677);
-{ uint32_t x680; uint32_t x679 = _mulx_u32(x670, 0xffffffff, &x680);
-{ uint32_t x683; uint32_t x682 = _mulx_u32(x670, 0xffffffff, &x683);
-{ uint8_t x684 = (0x0 + 0x0);
-{ uint8_t x685 = (0x0 + 0x0);
-{ uint32_t x687; uint8_t x688 = _addcarryx_u32(0x0, x674, x676, &x687);
-{ uint32_t x690; uint8_t x691 = _addcarryx_u32(x688, x677, x679, &x690);
-{ uint32_t x693; uint8_t x694 = _addcarryx_u32(x691, x680, x682, &x693);
-{ uint32_t x696; uint8_t _ = _addcarryx_u32(0x0, x694, x683, &x696);
-{ uint32_t _; uint8_t x700 = _addcarryx_u32(0x0, x646, x670, &_);
-{ uint32_t x702; uint8_t x703 = _addcarryx_u32(x700, x649, x684, &x702);
-{ uint32_t x705; uint8_t x706 = _addcarryx_u32(x703, x652, x685, &x705);
-{ uint32_t x708; uint8_t x709 = _addcarryx_u32(x706, x655, x673, &x708);
-{ uint32_t x711; uint8_t x712 = _addcarryx_u32(x709, x658, x687, &x711);
-{ uint32_t x714; uint8_t x715 = _addcarryx_u32(x712, x661, x690, &x714);
-{ uint32_t x717; uint8_t x718 = _addcarryx_u32(x715, x664, x693, &x717);
-{ uint32_t x720; uint8_t x721 = _addcarryx_u32(x718, x667, x696, &x720);
-{ uint8_t x722 = (x721 + x668);
-{ uint32_t x725; uint32_t x724 = _mulx_u32(x14, x17, &x725);
-{ uint32_t x728; uint32_t x727 = _mulx_u32(x14, x19, &x728);
-{ uint32_t x731; uint32_t x730 = _mulx_u32(x14, x21, &x731);
-{ uint32_t x734; uint32_t x733 = _mulx_u32(x14, x23, &x734);
-{ uint32_t x737; uint32_t x736 = _mulx_u32(x14, x25, &x737);
-{ uint32_t x740; uint32_t x739 = _mulx_u32(x14, x27, &x740);
-{ uint32_t x743; uint32_t x742 = _mulx_u32(x14, x26, &x743);
-{ uint32_t x745; uint8_t x746 = _addcarryx_u32(0x0, x725, x727, &x745);
-{ uint32_t x748; uint8_t x749 = _addcarryx_u32(x746, x728, x730, &x748);
-{ uint32_t x751; uint8_t x752 = _addcarryx_u32(x749, x731, x733, &x751);
-{ uint32_t x754; uint8_t x755 = _addcarryx_u32(x752, x734, x736, &x754);
-{ uint32_t x757; uint8_t x758 = _addcarryx_u32(x755, x737, x739, &x757);
-{ uint32_t x760; uint8_t x761 = _addcarryx_u32(x758, x740, x742, &x760);
-{ uint32_t x763; uint8_t _ = _addcarryx_u32(0x0, x761, x743, &x763);
-{ uint32_t x766; uint8_t x767 = _addcarryx_u32(0x0, x702, x724, &x766);
-{ uint32_t x769; uint8_t x770 = _addcarryx_u32(x767, x705, x745, &x769);
-{ uint32_t x772; uint8_t x773 = _addcarryx_u32(x770, x708, x748, &x772);
-{ uint32_t x775; uint8_t x776 = _addcarryx_u32(x773, x711, x751, &x775);
-{ uint32_t x778; uint8_t x779 = _addcarryx_u32(x776, x714, x754, &x778);
-{ uint32_t x781; uint8_t x782 = _addcarryx_u32(x779, x717, x757, &x781);
-{ uint32_t x784; uint8_t x785 = _addcarryx_u32(x782, x720, x760, &x784);
-{ uint32_t x787; uint8_t x788 = _addcarryx_u32(x785, x722, x763, &x787);
-{ uint32_t _; uint32_t x790 = _mulx_u32(x766, 0xffffffff, &_);
-{ uint32_t x794; uint32_t x793 = _mulx_u32(x790, 0xffffffff, &x794);
-{ uint32_t x797; uint32_t x796 = _mulx_u32(x790, 0xffffffff, &x797);
-{ uint32_t x800; uint32_t x799 = _mulx_u32(x790, 0xffffffff, &x800);
-{ uint32_t x803; uint32_t x802 = _mulx_u32(x790, 0xffffffff, &x803);
-{ uint8_t x804 = (0x0 + 0x0);
-{ uint8_t x805 = (0x0 + 0x0);
-{ uint32_t x807; uint8_t x808 = _addcarryx_u32(0x0, x794, x796, &x807);
-{ uint32_t x810; uint8_t x811 = _addcarryx_u32(x808, x797, x799, &x810);
-{ uint32_t x813; uint8_t x814 = _addcarryx_u32(x811, x800, x802, &x813);
-{ uint32_t x816; uint8_t _ = _addcarryx_u32(0x0, x814, x803, &x816);
-{ uint32_t _; uint8_t x820 = _addcarryx_u32(0x0, x766, x790, &_);
-{ uint32_t x822; uint8_t x823 = _addcarryx_u32(x820, x769, x804, &x822);
-{ uint32_t x825; uint8_t x826 = _addcarryx_u32(x823, x772, x805, &x825);
-{ uint32_t x828; uint8_t x829 = _addcarryx_u32(x826, x775, x793, &x828);
-{ uint32_t x831; uint8_t x832 = _addcarryx_u32(x829, x778, x807, &x831);
-{ uint32_t x834; uint8_t x835 = _addcarryx_u32(x832, x781, x810, &x834);
-{ uint32_t x837; uint8_t x838 = _addcarryx_u32(x835, x784, x813, &x837);
-{ uint32_t x840; uint8_t x841 = _addcarryx_u32(x838, x787, x816, &x840);
-{ uint8_t x842 = (x841 + x788);
-{ uint32_t x844; uint8_t x845 = _subborrow_u32(0x0, x822, 0x1, &x844);
-{ uint32_t x847; uint8_t x848 = _subborrow_u32(x845, x825, 0x0, &x847);
-{ uint32_t x850; uint8_t x851 = _subborrow_u32(x848, x828, 0x0, &x850);
-{ uint32_t x853; uint8_t x854 = _subborrow_u32(x851, x831, 0xffffffff, &x853);
-{ uint32_t x856; uint8_t x857 = _subborrow_u32(x854, x834, 0xffffffff, &x856);
-{ uint32_t x859; uint8_t x860 = _subborrow_u32(x857, x837, 0xffffffff, &x859);
-{ uint32_t x862; uint8_t x863 = _subborrow_u32(x860, x840, 0xffffffff, &x862);
-{ uint32_t _; uint8_t x866 = _subborrow_u32(x863, x842, 0x0, &_);
-{ uint32_t x867 = cmovznz(x866, x862, x840);
-{ uint32_t x868 = cmovznz(x866, x859, x837);
-{ uint32_t x869 = cmovznz(x866, x856, x834);
-{ uint32_t x870 = cmovznz(x866, x853, x831);
-{ uint32_t x871 = cmovznz(x866, x850, x828);
-{ uint32_t x872 = cmovznz(x866, x847, x825);
-{ uint32_t x873 = cmovznz(x866, x844, x822);
-out[0] = x867;
-out[1] = x868;
-out[2] = x869;
-out[3] = x870;
-out[4] = x871;
-out[5] = x872;
-out[6] = x873;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void femul(uint32_t out[7], const uint32_t in1[7], const uint32_t in2[7]) {
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x26 = in2[6];
+ { const uint32_t x27 = in2[5];
+ { const uint32_t x25 = in2[4];
+ { const uint32_t x23 = in2[3];
+ { const uint32_t x21 = in2[2];
+ { const uint32_t x19 = in2[1];
+ { const uint32_t x17 = in2[0];
+ { uint32_t x30; uint32_t x29 = _mulx_u32(x5, x17, &x30);
+ { uint32_t x33; uint32_t x32 = _mulx_u32(x5, x19, &x33);
+ { uint32_t x36; uint32_t x35 = _mulx_u32(x5, x21, &x36);
+ { uint32_t x39; uint32_t x38 = _mulx_u32(x5, x23, &x39);
+ { uint32_t x42; uint32_t x41 = _mulx_u32(x5, x25, &x42);
+ { uint32_t x45; uint32_t x44 = _mulx_u32(x5, x27, &x45);
+ { uint32_t x48; uint32_t x47 = _mulx_u32(x5, x26, &x48);
+ { uint32_t x50; uint8_t x51 = _addcarryx_u32(0x0, x30, x32, &x50);
+ { uint32_t x53; uint8_t x54 = _addcarryx_u32(x51, x33, x35, &x53);
+ { uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x36, x38, &x56);
+ { uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x39, x41, &x59);
+ { uint32_t x62; uint8_t x63 = _addcarryx_u32(x60, x42, x44, &x62);
+ { uint32_t x65; uint8_t x66 = _addcarryx_u32(x63, x45, x47, &x65);
+ { uint32_t x68; uint8_t _ = _addcarryx_u32(0x0, x66, x48, &x68);
+ { uint32_t _; uint32_t x71 = _mulx_u32(x29, 0xffffffff, &_);
+ { uint32_t x75; uint32_t x74 = _mulx_u32(x71, 0xffffffff, &x75);
+ { uint32_t x78; uint32_t x77 = _mulx_u32(x71, 0xffffffff, &x78);
+ { uint32_t x81; uint32_t x80 = _mulx_u32(x71, 0xffffffff, &x81);
+ { uint32_t x84; uint32_t x83 = _mulx_u32(x71, 0xffffffff, &x84);
+ { uint8_t x85 = (0x0 + 0x0);
+ { uint8_t x86 = (0x0 + 0x0);
+ { uint32_t x88; uint8_t x89 = _addcarryx_u32(0x0, x75, x77, &x88);
+ { uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x78, x80, &x91);
+ { uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x81, x83, &x94);
+ { uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x84, &x97);
+ { uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x29, x71, &_);
+ { uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x50, x85, &x103);
+ { uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x53, x86, &x106);
+ { uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x56, x74, &x109);
+ { uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x59, x88, &x112);
+ { uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x62, x91, &x115);
+ { uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x65, x94, &x118);
+ { uint32_t x121; uint8_t x122 = _addcarryx_u32(x119, x68, x97, &x121);
+ { uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125);
+ { uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128);
+ { uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131);
+ { uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134);
+ { uint32_t x137; uint32_t x136 = _mulx_u32(x7, x25, &x137);
+ { uint32_t x140; uint32_t x139 = _mulx_u32(x7, x27, &x140);
+ { uint32_t x143; uint32_t x142 = _mulx_u32(x7, x26, &x143);
+ { uint32_t x145; uint8_t x146 = _addcarryx_u32(0x0, x125, x127, &x145);
+ { uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x128, x130, &x148);
+ { uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x131, x133, &x151);
+ { uint32_t x154; uint8_t x155 = _addcarryx_u32(x152, x134, x136, &x154);
+ { uint32_t x157; uint8_t x158 = _addcarryx_u32(x155, x137, x139, &x157);
+ { uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x140, x142, &x160);
+ { uint32_t x163; uint8_t _ = _addcarryx_u32(0x0, x161, x143, &x163);
+ { uint32_t x166; uint8_t x167 = _addcarryx_u32(0x0, x103, x124, &x166);
+ { uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x106, x145, &x169);
+ { uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x109, x148, &x172);
+ { uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x112, x151, &x175);
+ { uint32_t x178; uint8_t x179 = _addcarryx_u32(x176, x115, x154, &x178);
+ { uint32_t x181; uint8_t x182 = _addcarryx_u32(x179, x118, x157, &x181);
+ { uint32_t x184; uint8_t x185 = _addcarryx_u32(x182, x121, x160, &x184);
+ { uint32_t x187; uint8_t x188 = _addcarryx_u32(x185, x122, x163, &x187);
+ { uint32_t _; uint32_t x190 = _mulx_u32(x166, 0xffffffff, &_);
+ { uint32_t x194; uint32_t x193 = _mulx_u32(x190, 0xffffffff, &x194);
+ { uint32_t x197; uint32_t x196 = _mulx_u32(x190, 0xffffffff, &x197);
+ { uint32_t x200; uint32_t x199 = _mulx_u32(x190, 0xffffffff, &x200);
+ { uint32_t x203; uint32_t x202 = _mulx_u32(x190, 0xffffffff, &x203);
+ { uint8_t x204 = (0x0 + 0x0);
+ { uint8_t x205 = (0x0 + 0x0);
+ { uint32_t x207; uint8_t x208 = _addcarryx_u32(0x0, x194, x196, &x207);
+ { uint32_t x210; uint8_t x211 = _addcarryx_u32(x208, x197, x199, &x210);
+ { uint32_t x213; uint8_t x214 = _addcarryx_u32(x211, x200, x202, &x213);
+ { uint32_t x216; uint8_t _ = _addcarryx_u32(0x0, x214, x203, &x216);
+ { uint32_t _; uint8_t x220 = _addcarryx_u32(0x0, x166, x190, &_);
+ { uint32_t x222; uint8_t x223 = _addcarryx_u32(x220, x169, x204, &x222);
+ { uint32_t x225; uint8_t x226 = _addcarryx_u32(x223, x172, x205, &x225);
+ { uint32_t x228; uint8_t x229 = _addcarryx_u32(x226, x175, x193, &x228);
+ { uint32_t x231; uint8_t x232 = _addcarryx_u32(x229, x178, x207, &x231);
+ { uint32_t x234; uint8_t x235 = _addcarryx_u32(x232, x181, x210, &x234);
+ { uint32_t x237; uint8_t x238 = _addcarryx_u32(x235, x184, x213, &x237);
+ { uint32_t x240; uint8_t x241 = _addcarryx_u32(x238, x187, x216, &x240);
+ { uint8_t x242 = (x241 + x188);
+ { uint32_t x245; uint32_t x244 = _mulx_u32(x9, x17, &x245);
+ { uint32_t x248; uint32_t x247 = _mulx_u32(x9, x19, &x248);
+ { uint32_t x251; uint32_t x250 = _mulx_u32(x9, x21, &x251);
+ { uint32_t x254; uint32_t x253 = _mulx_u32(x9, x23, &x254);
+ { uint32_t x257; uint32_t x256 = _mulx_u32(x9, x25, &x257);
+ { uint32_t x260; uint32_t x259 = _mulx_u32(x9, x27, &x260);
+ { uint32_t x263; uint32_t x262 = _mulx_u32(x9, x26, &x263);
+ { uint32_t x265; uint8_t x266 = _addcarryx_u32(0x0, x245, x247, &x265);
+ { uint32_t x268; uint8_t x269 = _addcarryx_u32(x266, x248, x250, &x268);
+ { uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x251, x253, &x271);
+ { uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x254, x256, &x274);
+ { uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x257, x259, &x277);
+ { uint32_t x280; uint8_t x281 = _addcarryx_u32(x278, x260, x262, &x280);
+ { uint32_t x283; uint8_t _ = _addcarryx_u32(0x0, x281, x263, &x283);
+ { uint32_t x286; uint8_t x287 = _addcarryx_u32(0x0, x222, x244, &x286);
+ { uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x225, x265, &x289);
+ { uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x228, x268, &x292);
+ { uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x231, x271, &x295);
+ { uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x234, x274, &x298);
+ { uint32_t x301; uint8_t x302 = _addcarryx_u32(x299, x237, x277, &x301);
+ { uint32_t x304; uint8_t x305 = _addcarryx_u32(x302, x240, x280, &x304);
+ { uint32_t x307; uint8_t x308 = _addcarryx_u32(x305, x242, x283, &x307);
+ { uint32_t _; uint32_t x310 = _mulx_u32(x286, 0xffffffff, &_);
+ { uint32_t x314; uint32_t x313 = _mulx_u32(x310, 0xffffffff, &x314);
+ { uint32_t x317; uint32_t x316 = _mulx_u32(x310, 0xffffffff, &x317);
+ { uint32_t x320; uint32_t x319 = _mulx_u32(x310, 0xffffffff, &x320);
+ { uint32_t x323; uint32_t x322 = _mulx_u32(x310, 0xffffffff, &x323);
+ { uint8_t x324 = (0x0 + 0x0);
+ { uint8_t x325 = (0x0 + 0x0);
+ { uint32_t x327; uint8_t x328 = _addcarryx_u32(0x0, x314, x316, &x327);
+ { uint32_t x330; uint8_t x331 = _addcarryx_u32(x328, x317, x319, &x330);
+ { uint32_t x333; uint8_t x334 = _addcarryx_u32(x331, x320, x322, &x333);
+ { uint32_t x336; uint8_t _ = _addcarryx_u32(0x0, x334, x323, &x336);
+ { uint32_t _; uint8_t x340 = _addcarryx_u32(0x0, x286, x310, &_);
+ { uint32_t x342; uint8_t x343 = _addcarryx_u32(x340, x289, x324, &x342);
+ { uint32_t x345; uint8_t x346 = _addcarryx_u32(x343, x292, x325, &x345);
+ { uint32_t x348; uint8_t x349 = _addcarryx_u32(x346, x295, x313, &x348);
+ { uint32_t x351; uint8_t x352 = _addcarryx_u32(x349, x298, x327, &x351);
+ { uint32_t x354; uint8_t x355 = _addcarryx_u32(x352, x301, x330, &x354);
+ { uint32_t x357; uint8_t x358 = _addcarryx_u32(x355, x304, x333, &x357);
+ { uint32_t x360; uint8_t x361 = _addcarryx_u32(x358, x307, x336, &x360);
+ { uint8_t x362 = (x361 + x308);
+ { uint32_t x365; uint32_t x364 = _mulx_u32(x11, x17, &x365);
+ { uint32_t x368; uint32_t x367 = _mulx_u32(x11, x19, &x368);
+ { uint32_t x371; uint32_t x370 = _mulx_u32(x11, x21, &x371);
+ { uint32_t x374; uint32_t x373 = _mulx_u32(x11, x23, &x374);
+ { uint32_t x377; uint32_t x376 = _mulx_u32(x11, x25, &x377);
+ { uint32_t x380; uint32_t x379 = _mulx_u32(x11, x27, &x380);
+ { uint32_t x383; uint32_t x382 = _mulx_u32(x11, x26, &x383);
+ { uint32_t x385; uint8_t x386 = _addcarryx_u32(0x0, x365, x367, &x385);
+ { uint32_t x388; uint8_t x389 = _addcarryx_u32(x386, x368, x370, &x388);
+ { uint32_t x391; uint8_t x392 = _addcarryx_u32(x389, x371, x373, &x391);
+ { uint32_t x394; uint8_t x395 = _addcarryx_u32(x392, x374, x376, &x394);
+ { uint32_t x397; uint8_t x398 = _addcarryx_u32(x395, x377, x379, &x397);
+ { uint32_t x400; uint8_t x401 = _addcarryx_u32(x398, x380, x382, &x400);
+ { uint32_t x403; uint8_t _ = _addcarryx_u32(0x0, x401, x383, &x403);
+ { uint32_t x406; uint8_t x407 = _addcarryx_u32(0x0, x342, x364, &x406);
+ { uint32_t x409; uint8_t x410 = _addcarryx_u32(x407, x345, x385, &x409);
+ { uint32_t x412; uint8_t x413 = _addcarryx_u32(x410, x348, x388, &x412);
+ { uint32_t x415; uint8_t x416 = _addcarryx_u32(x413, x351, x391, &x415);
+ { uint32_t x418; uint8_t x419 = _addcarryx_u32(x416, x354, x394, &x418);
+ { uint32_t x421; uint8_t x422 = _addcarryx_u32(x419, x357, x397, &x421);
+ { uint32_t x424; uint8_t x425 = _addcarryx_u32(x422, x360, x400, &x424);
+ { uint32_t x427; uint8_t x428 = _addcarryx_u32(x425, x362, x403, &x427);
+ { uint32_t _; uint32_t x430 = _mulx_u32(x406, 0xffffffff, &_);
+ { uint32_t x434; uint32_t x433 = _mulx_u32(x430, 0xffffffff, &x434);
+ { uint32_t x437; uint32_t x436 = _mulx_u32(x430, 0xffffffff, &x437);
+ { uint32_t x440; uint32_t x439 = _mulx_u32(x430, 0xffffffff, &x440);
+ { uint32_t x443; uint32_t x442 = _mulx_u32(x430, 0xffffffff, &x443);
+ { uint8_t x444 = (0x0 + 0x0);
+ { uint8_t x445 = (0x0 + 0x0);
+ { uint32_t x447; uint8_t x448 = _addcarryx_u32(0x0, x434, x436, &x447);
+ { uint32_t x450; uint8_t x451 = _addcarryx_u32(x448, x437, x439, &x450);
+ { uint32_t x453; uint8_t x454 = _addcarryx_u32(x451, x440, x442, &x453);
+ { uint32_t x456; uint8_t _ = _addcarryx_u32(0x0, x454, x443, &x456);
+ { uint32_t _; uint8_t x460 = _addcarryx_u32(0x0, x406, x430, &_);
+ { uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x409, x444, &x462);
+ { uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x412, x445, &x465);
+ { uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x415, x433, &x468);
+ { uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x418, x447, &x471);
+ { uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x421, x450, &x474);
+ { uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x424, x453, &x477);
+ { uint32_t x480; uint8_t x481 = _addcarryx_u32(x478, x427, x456, &x480);
+ { uint8_t x482 = (x481 + x428);
+ { uint32_t x485; uint32_t x484 = _mulx_u32(x13, x17, &x485);
+ { uint32_t x488; uint32_t x487 = _mulx_u32(x13, x19, &x488);
+ { uint32_t x491; uint32_t x490 = _mulx_u32(x13, x21, &x491);
+ { uint32_t x494; uint32_t x493 = _mulx_u32(x13, x23, &x494);
+ { uint32_t x497; uint32_t x496 = _mulx_u32(x13, x25, &x497);
+ { uint32_t x500; uint32_t x499 = _mulx_u32(x13, x27, &x500);
+ { uint32_t x503; uint32_t x502 = _mulx_u32(x13, x26, &x503);
+ { uint32_t x505; uint8_t x506 = _addcarryx_u32(0x0, x485, x487, &x505);
+ { uint32_t x508; uint8_t x509 = _addcarryx_u32(x506, x488, x490, &x508);
+ { uint32_t x511; uint8_t x512 = _addcarryx_u32(x509, x491, x493, &x511);
+ { uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x494, x496, &x514);
+ { uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x497, x499, &x517);
+ { uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x500, x502, &x520);
+ { uint32_t x523; uint8_t _ = _addcarryx_u32(0x0, x521, x503, &x523);
+ { uint32_t x526; uint8_t x527 = _addcarryx_u32(0x0, x462, x484, &x526);
+ { uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x465, x505, &x529);
+ { uint32_t x532; uint8_t x533 = _addcarryx_u32(x530, x468, x508, &x532);
+ { uint32_t x535; uint8_t x536 = _addcarryx_u32(x533, x471, x511, &x535);
+ { uint32_t x538; uint8_t x539 = _addcarryx_u32(x536, x474, x514, &x538);
+ { uint32_t x541; uint8_t x542 = _addcarryx_u32(x539, x477, x517, &x541);
+ { uint32_t x544; uint8_t x545 = _addcarryx_u32(x542, x480, x520, &x544);
+ { uint32_t x547; uint8_t x548 = _addcarryx_u32(x545, x482, x523, &x547);
+ { uint32_t _; uint32_t x550 = _mulx_u32(x526, 0xffffffff, &_);
+ { uint32_t x554; uint32_t x553 = _mulx_u32(x550, 0xffffffff, &x554);
+ { uint32_t x557; uint32_t x556 = _mulx_u32(x550, 0xffffffff, &x557);
+ { uint32_t x560; uint32_t x559 = _mulx_u32(x550, 0xffffffff, &x560);
+ { uint32_t x563; uint32_t x562 = _mulx_u32(x550, 0xffffffff, &x563);
+ { uint8_t x564 = (0x0 + 0x0);
+ { uint8_t x565 = (0x0 + 0x0);
+ { uint32_t x567; uint8_t x568 = _addcarryx_u32(0x0, x554, x556, &x567);
+ { uint32_t x570; uint8_t x571 = _addcarryx_u32(x568, x557, x559, &x570);
+ { uint32_t x573; uint8_t x574 = _addcarryx_u32(x571, x560, x562, &x573);
+ { uint32_t x576; uint8_t _ = _addcarryx_u32(0x0, x574, x563, &x576);
+ { uint32_t _; uint8_t x580 = _addcarryx_u32(0x0, x526, x550, &_);
+ { uint32_t x582; uint8_t x583 = _addcarryx_u32(x580, x529, x564, &x582);
+ { uint32_t x585; uint8_t x586 = _addcarryx_u32(x583, x532, x565, &x585);
+ { uint32_t x588; uint8_t x589 = _addcarryx_u32(x586, x535, x553, &x588);
+ { uint32_t x591; uint8_t x592 = _addcarryx_u32(x589, x538, x567, &x591);
+ { uint32_t x594; uint8_t x595 = _addcarryx_u32(x592, x541, x570, &x594);
+ { uint32_t x597; uint8_t x598 = _addcarryx_u32(x595, x544, x573, &x597);
+ { uint32_t x600; uint8_t x601 = _addcarryx_u32(x598, x547, x576, &x600);
+ { uint8_t x602 = (x601 + x548);
+ { uint32_t x605; uint32_t x604 = _mulx_u32(x15, x17, &x605);
+ { uint32_t x608; uint32_t x607 = _mulx_u32(x15, x19, &x608);
+ { uint32_t x611; uint32_t x610 = _mulx_u32(x15, x21, &x611);
+ { uint32_t x614; uint32_t x613 = _mulx_u32(x15, x23, &x614);
+ { uint32_t x617; uint32_t x616 = _mulx_u32(x15, x25, &x617);
+ { uint32_t x620; uint32_t x619 = _mulx_u32(x15, x27, &x620);
+ { uint32_t x623; uint32_t x622 = _mulx_u32(x15, x26, &x623);
+ { uint32_t x625; uint8_t x626 = _addcarryx_u32(0x0, x605, x607, &x625);
+ { uint32_t x628; uint8_t x629 = _addcarryx_u32(x626, x608, x610, &x628);
+ { uint32_t x631; uint8_t x632 = _addcarryx_u32(x629, x611, x613, &x631);
+ { uint32_t x634; uint8_t x635 = _addcarryx_u32(x632, x614, x616, &x634);
+ { uint32_t x637; uint8_t x638 = _addcarryx_u32(x635, x617, x619, &x637);
+ { uint32_t x640; uint8_t x641 = _addcarryx_u32(x638, x620, x622, &x640);
+ { uint32_t x643; uint8_t _ = _addcarryx_u32(0x0, x641, x623, &x643);
+ { uint32_t x646; uint8_t x647 = _addcarryx_u32(0x0, x582, x604, &x646);
+ { uint32_t x649; uint8_t x650 = _addcarryx_u32(x647, x585, x625, &x649);
+ { uint32_t x652; uint8_t x653 = _addcarryx_u32(x650, x588, x628, &x652);
+ { uint32_t x655; uint8_t x656 = _addcarryx_u32(x653, x591, x631, &x655);
+ { uint32_t x658; uint8_t x659 = _addcarryx_u32(x656, x594, x634, &x658);
+ { uint32_t x661; uint8_t x662 = _addcarryx_u32(x659, x597, x637, &x661);
+ { uint32_t x664; uint8_t x665 = _addcarryx_u32(x662, x600, x640, &x664);
+ { uint32_t x667; uint8_t x668 = _addcarryx_u32(x665, x602, x643, &x667);
+ { uint32_t _; uint32_t x670 = _mulx_u32(x646, 0xffffffff, &_);
+ { uint32_t x674; uint32_t x673 = _mulx_u32(x670, 0xffffffff, &x674);
+ { uint32_t x677; uint32_t x676 = _mulx_u32(x670, 0xffffffff, &x677);
+ { uint32_t x680; uint32_t x679 = _mulx_u32(x670, 0xffffffff, &x680);
+ { uint32_t x683; uint32_t x682 = _mulx_u32(x670, 0xffffffff, &x683);
+ { uint8_t x684 = (0x0 + 0x0);
+ { uint8_t x685 = (0x0 + 0x0);
+ { uint32_t x687; uint8_t x688 = _addcarryx_u32(0x0, x674, x676, &x687);
+ { uint32_t x690; uint8_t x691 = _addcarryx_u32(x688, x677, x679, &x690);
+ { uint32_t x693; uint8_t x694 = _addcarryx_u32(x691, x680, x682, &x693);
+ { uint32_t x696; uint8_t _ = _addcarryx_u32(0x0, x694, x683, &x696);
+ { uint32_t _; uint8_t x700 = _addcarryx_u32(0x0, x646, x670, &_);
+ { uint32_t x702; uint8_t x703 = _addcarryx_u32(x700, x649, x684, &x702);
+ { uint32_t x705; uint8_t x706 = _addcarryx_u32(x703, x652, x685, &x705);
+ { uint32_t x708; uint8_t x709 = _addcarryx_u32(x706, x655, x673, &x708);
+ { uint32_t x711; uint8_t x712 = _addcarryx_u32(x709, x658, x687, &x711);
+ { uint32_t x714; uint8_t x715 = _addcarryx_u32(x712, x661, x690, &x714);
+ { uint32_t x717; uint8_t x718 = _addcarryx_u32(x715, x664, x693, &x717);
+ { uint32_t x720; uint8_t x721 = _addcarryx_u32(x718, x667, x696, &x720);
+ { uint8_t x722 = (x721 + x668);
+ { uint32_t x725; uint32_t x724 = _mulx_u32(x14, x17, &x725);
+ { uint32_t x728; uint32_t x727 = _mulx_u32(x14, x19, &x728);
+ { uint32_t x731; uint32_t x730 = _mulx_u32(x14, x21, &x731);
+ { uint32_t x734; uint32_t x733 = _mulx_u32(x14, x23, &x734);
+ { uint32_t x737; uint32_t x736 = _mulx_u32(x14, x25, &x737);
+ { uint32_t x740; uint32_t x739 = _mulx_u32(x14, x27, &x740);
+ { uint32_t x743; uint32_t x742 = _mulx_u32(x14, x26, &x743);
+ { uint32_t x745; uint8_t x746 = _addcarryx_u32(0x0, x725, x727, &x745);
+ { uint32_t x748; uint8_t x749 = _addcarryx_u32(x746, x728, x730, &x748);
+ { uint32_t x751; uint8_t x752 = _addcarryx_u32(x749, x731, x733, &x751);
+ { uint32_t x754; uint8_t x755 = _addcarryx_u32(x752, x734, x736, &x754);
+ { uint32_t x757; uint8_t x758 = _addcarryx_u32(x755, x737, x739, &x757);
+ { uint32_t x760; uint8_t x761 = _addcarryx_u32(x758, x740, x742, &x760);
+ { uint32_t x763; uint8_t _ = _addcarryx_u32(0x0, x761, x743, &x763);
+ { uint32_t x766; uint8_t x767 = _addcarryx_u32(0x0, x702, x724, &x766);
+ { uint32_t x769; uint8_t x770 = _addcarryx_u32(x767, x705, x745, &x769);
+ { uint32_t x772; uint8_t x773 = _addcarryx_u32(x770, x708, x748, &x772);
+ { uint32_t x775; uint8_t x776 = _addcarryx_u32(x773, x711, x751, &x775);
+ { uint32_t x778; uint8_t x779 = _addcarryx_u32(x776, x714, x754, &x778);
+ { uint32_t x781; uint8_t x782 = _addcarryx_u32(x779, x717, x757, &x781);
+ { uint32_t x784; uint8_t x785 = _addcarryx_u32(x782, x720, x760, &x784);
+ { uint32_t x787; uint8_t x788 = _addcarryx_u32(x785, x722, x763, &x787);
+ { uint32_t _; uint32_t x790 = _mulx_u32(x766, 0xffffffff, &_);
+ { uint32_t x794; uint32_t x793 = _mulx_u32(x790, 0xffffffff, &x794);
+ { uint32_t x797; uint32_t x796 = _mulx_u32(x790, 0xffffffff, &x797);
+ { uint32_t x800; uint32_t x799 = _mulx_u32(x790, 0xffffffff, &x800);
+ { uint32_t x803; uint32_t x802 = _mulx_u32(x790, 0xffffffff, &x803);
+ { uint8_t x804 = (0x0 + 0x0);
+ { uint8_t x805 = (0x0 + 0x0);
+ { uint32_t x807; uint8_t x808 = _addcarryx_u32(0x0, x794, x796, &x807);
+ { uint32_t x810; uint8_t x811 = _addcarryx_u32(x808, x797, x799, &x810);
+ { uint32_t x813; uint8_t x814 = _addcarryx_u32(x811, x800, x802, &x813);
+ { uint32_t x816; uint8_t _ = _addcarryx_u32(0x0, x814, x803, &x816);
+ { uint32_t _; uint8_t x820 = _addcarryx_u32(0x0, x766, x790, &_);
+ { uint32_t x822; uint8_t x823 = _addcarryx_u32(x820, x769, x804, &x822);
+ { uint32_t x825; uint8_t x826 = _addcarryx_u32(x823, x772, x805, &x825);
+ { uint32_t x828; uint8_t x829 = _addcarryx_u32(x826, x775, x793, &x828);
+ { uint32_t x831; uint8_t x832 = _addcarryx_u32(x829, x778, x807, &x831);
+ { uint32_t x834; uint8_t x835 = _addcarryx_u32(x832, x781, x810, &x834);
+ { uint32_t x837; uint8_t x838 = _addcarryx_u32(x835, x784, x813, &x837);
+ { uint32_t x840; uint8_t x841 = _addcarryx_u32(x838, x787, x816, &x840);
+ { uint8_t x842 = (x841 + x788);
+ { uint32_t x844; uint8_t x845 = _subborrow_u32(0x0, x822, 0x1, &x844);
+ { uint32_t x847; uint8_t x848 = _subborrow_u32(x845, x825, 0x0, &x847);
+ { uint32_t x850; uint8_t x851 = _subborrow_u32(x848, x828, 0x0, &x850);
+ { uint32_t x853; uint8_t x854 = _subborrow_u32(x851, x831, 0xffffffff, &x853);
+ { uint32_t x856; uint8_t x857 = _subborrow_u32(x854, x834, 0xffffffff, &x856);
+ { uint32_t x859; uint8_t x860 = _subborrow_u32(x857, x837, 0xffffffff, &x859);
+ { uint32_t x862; uint8_t x863 = _subborrow_u32(x860, x840, 0xffffffff, &x862);
+ { uint32_t _; uint8_t x866 = _subborrow_u32(x863, x842, 0x0, &_);
+ { uint32_t x867 = cmovznz(x866, x862, x840);
+ { uint32_t x868 = cmovznz(x866, x859, x837);
+ { uint32_t x869 = cmovznz(x866, x856, x834);
+ { uint32_t x870 = cmovznz(x866, x853, x831);
+ { uint32_t x871 = cmovznz(x866, x850, x828);
+ { uint32_t x872 = cmovznz(x866, x847, x825);
+ { uint32_t x873 = cmovznz(x866, x844, x822);
+ out[0] = x873;
+ out[1] = x872;
+ out[2] = x871;
+ out[3] = x870;
+ out[4] = x869;
+ out[5] = x868;
+ out[6] = x867;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e224m2e96p1/fenz.c b/src/Specific/montgomery32_2e224m2e96p1/fenz.c
index a6516fa7a..387dcfe67 100644
--- a/src/Specific/montgomery32_2e224m2e96p1/fenz.c
+++ b/src/Specific/montgomery32_2e224m2e96p1/fenz.c
@@ -1,28 +1,17 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x13 = (x12 | x11);
-{ uint32_t x14 = (x10 | x13);
-{ uint32_t x15 = (x8 | x14);
-{ uint32_t x16 = (x6 | x15);
-{ uint32_t x17 = (x4 | x16);
-{ uint32_t x18 = (x2 | x17);
-out[0] = x18;
-}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[7]) {
+ { const uint32_t x11 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x13 = (x12 | x11);
+ { uint32_t x14 = (x10 | x13);
+ { uint32_t x15 = (x8 | x14);
+ { uint32_t x16 = (x6 | x15);
+ { uint32_t x17 = (x4 | x16);
+ { uint32_t x18 = (x2 | x17);
+ out[0] = x18;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e226m5/feadd.c b/src/Specific/montgomery32_2e226m5/feadd.c
index f6c20643d..be4e3c5fe 100644
--- a/src/Specific/montgomery32_2e226m5/feadd.c
+++ b/src/Specific/montgomery32_2e226m5/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
-{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
-{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
-{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xfffffffb, &x57);
-{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
-{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
-{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
-{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
-{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
-{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
-{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x3, &x78);
-{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
-{ uint32_t x83 = cmovznz(x82, x78, x54);
-{ uint32_t x84 = cmovznz(x82, x75, x51);
-{ uint32_t x85 = cmovznz(x82, x72, x48);
-{ uint32_t x86 = cmovznz(x82, x69, x45);
-{ uint32_t x87 = cmovznz(x82, x66, x42);
-{ uint32_t x88 = cmovznz(x82, x63, x39);
-{ uint32_t x89 = cmovznz(x82, x60, x36);
-{ uint32_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint32_t out[8], const uint32_t in1[8], const uint32_t in2[8]) {
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x30 = in2[7];
+ { const uint32_t x31 = in2[6];
+ { const uint32_t x29 = in2[5];
+ { const uint32_t x27 = in2[4];
+ { const uint32_t x25 = in2[3];
+ { const uint32_t x23 = in2[2];
+ { const uint32_t x21 = in2[1];
+ { const uint32_t x19 = in2[0];
+ { uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+ { uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+ { uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+ { uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xfffffffb, &x57);
+ { uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+ { uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+ { uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+ { uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+ { uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+ { uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+ { uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x3, &x78);
+ { uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+ { uint32_t x83 = cmovznz(x82, x78, x54);
+ { uint32_t x84 = cmovznz(x82, x75, x51);
+ { uint32_t x85 = cmovznz(x82, x72, x48);
+ { uint32_t x86 = cmovznz(x82, x69, x45);
+ { uint32_t x87 = cmovznz(x82, x66, x42);
+ { uint32_t x88 = cmovznz(x82, x63, x39);
+ { uint32_t x89 = cmovznz(x82, x60, x36);
+ { uint32_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e226m5/fenz.c b/src/Specific/montgomery32_2e226m5/fenz.c
index d35bfdd14..744f2aa5f 100644
--- a/src/Specific/montgomery32_2e226m5/fenz.c
+++ b/src/Specific/montgomery32_2e226m5/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x15 = (x14 | x13);
-{ uint32_t x16 = (x12 | x15);
-{ uint32_t x17 = (x10 | x16);
-{ uint32_t x18 = (x8 | x17);
-{ uint32_t x19 = (x6 | x18);
-{ uint32_t x20 = (x4 | x19);
-{ uint32_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x15 = (x14 | x13);
+ { uint32_t x16 = (x12 | x15);
+ { uint32_t x17 = (x10 | x16);
+ { uint32_t x18 = (x8 | x17);
+ { uint32_t x19 = (x6 | x18);
+ { uint32_t x20 = (x4 | x19);
+ { uint32_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e230m27/feadd.c b/src/Specific/montgomery32_2e230m27/feadd.c
index 3cbde1b2f..03a448fac 100644
--- a/src/Specific/montgomery32_2e230m27/feadd.c
+++ b/src/Specific/montgomery32_2e230m27/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
-{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
-{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
-{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xffffffe5, &x57);
-{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
-{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
-{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
-{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
-{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
-{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
-{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x3f, &x78);
-{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
-{ uint32_t x83 = cmovznz(x82, x78, x54);
-{ uint32_t x84 = cmovznz(x82, x75, x51);
-{ uint32_t x85 = cmovznz(x82, x72, x48);
-{ uint32_t x86 = cmovznz(x82, x69, x45);
-{ uint32_t x87 = cmovznz(x82, x66, x42);
-{ uint32_t x88 = cmovznz(x82, x63, x39);
-{ uint32_t x89 = cmovznz(x82, x60, x36);
-{ uint32_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint32_t out[8], const uint32_t in1[8], const uint32_t in2[8]) {
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x30 = in2[7];
+ { const uint32_t x31 = in2[6];
+ { const uint32_t x29 = in2[5];
+ { const uint32_t x27 = in2[4];
+ { const uint32_t x25 = in2[3];
+ { const uint32_t x23 = in2[2];
+ { const uint32_t x21 = in2[1];
+ { const uint32_t x19 = in2[0];
+ { uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+ { uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+ { uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+ { uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xffffffe5, &x57);
+ { uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+ { uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+ { uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+ { uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+ { uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+ { uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+ { uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x3f, &x78);
+ { uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+ { uint32_t x83 = cmovznz(x82, x78, x54);
+ { uint32_t x84 = cmovznz(x82, x75, x51);
+ { uint32_t x85 = cmovznz(x82, x72, x48);
+ { uint32_t x86 = cmovznz(x82, x69, x45);
+ { uint32_t x87 = cmovznz(x82, x66, x42);
+ { uint32_t x88 = cmovznz(x82, x63, x39);
+ { uint32_t x89 = cmovznz(x82, x60, x36);
+ { uint32_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e230m27/fenz.c b/src/Specific/montgomery32_2e230m27/fenz.c
index d35bfdd14..744f2aa5f 100644
--- a/src/Specific/montgomery32_2e230m27/fenz.c
+++ b/src/Specific/montgomery32_2e230m27/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x15 = (x14 | x13);
-{ uint32_t x16 = (x12 | x15);
-{ uint32_t x17 = (x10 | x16);
-{ uint32_t x18 = (x8 | x17);
-{ uint32_t x19 = (x6 | x18);
-{ uint32_t x20 = (x4 | x19);
-{ uint32_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x15 = (x14 | x13);
+ { uint32_t x16 = (x12 | x15);
+ { uint32_t x17 = (x10 | x16);
+ { uint32_t x18 = (x8 | x17);
+ { uint32_t x19 = (x6 | x18);
+ { uint32_t x20 = (x4 | x19);
+ { uint32_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e235m15/feadd.c b/src/Specific/montgomery32_2e235m15/feadd.c
index 892772269..2a86e1040 100644
--- a/src/Specific/montgomery32_2e235m15/feadd.c
+++ b/src/Specific/montgomery32_2e235m15/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
-{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
-{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
-{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xfffffff1, &x57);
-{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
-{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
-{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
-{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
-{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
-{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
-{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x7ff, &x78);
-{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
-{ uint32_t x83 = cmovznz(x82, x78, x54);
-{ uint32_t x84 = cmovznz(x82, x75, x51);
-{ uint32_t x85 = cmovznz(x82, x72, x48);
-{ uint32_t x86 = cmovznz(x82, x69, x45);
-{ uint32_t x87 = cmovznz(x82, x66, x42);
-{ uint32_t x88 = cmovznz(x82, x63, x39);
-{ uint32_t x89 = cmovznz(x82, x60, x36);
-{ uint32_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint32_t out[8], const uint32_t in1[8], const uint32_t in2[8]) {
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x30 = in2[7];
+ { const uint32_t x31 = in2[6];
+ { const uint32_t x29 = in2[5];
+ { const uint32_t x27 = in2[4];
+ { const uint32_t x25 = in2[3];
+ { const uint32_t x23 = in2[2];
+ { const uint32_t x21 = in2[1];
+ { const uint32_t x19 = in2[0];
+ { uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+ { uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+ { uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+ { uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xfffffff1, &x57);
+ { uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+ { uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+ { uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+ { uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+ { uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+ { uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+ { uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x7ff, &x78);
+ { uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+ { uint32_t x83 = cmovznz(x82, x78, x54);
+ { uint32_t x84 = cmovznz(x82, x75, x51);
+ { uint32_t x85 = cmovznz(x82, x72, x48);
+ { uint32_t x86 = cmovznz(x82, x69, x45);
+ { uint32_t x87 = cmovznz(x82, x66, x42);
+ { uint32_t x88 = cmovznz(x82, x63, x39);
+ { uint32_t x89 = cmovznz(x82, x60, x36);
+ { uint32_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e235m15/fenz.c b/src/Specific/montgomery32_2e235m15/fenz.c
index d35bfdd14..744f2aa5f 100644
--- a/src/Specific/montgomery32_2e235m15/fenz.c
+++ b/src/Specific/montgomery32_2e235m15/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x15 = (x14 | x13);
-{ uint32_t x16 = (x12 | x15);
-{ uint32_t x17 = (x10 | x16);
-{ uint32_t x18 = (x8 | x17);
-{ uint32_t x19 = (x6 | x18);
-{ uint32_t x20 = (x4 | x19);
-{ uint32_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x15 = (x14 | x13);
+ { uint32_t x16 = (x12 | x15);
+ { uint32_t x17 = (x10 | x16);
+ { uint32_t x18 = (x8 | x17);
+ { uint32_t x19 = (x6 | x18);
+ { uint32_t x20 = (x4 | x19);
+ { uint32_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e243m9/feadd.c b/src/Specific/montgomery32_2e243m9/feadd.c
index e09d0a8d4..b06c2c424 100644
--- a/src/Specific/montgomery32_2e243m9/feadd.c
+++ b/src/Specific/montgomery32_2e243m9/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
-{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
-{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
-{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xfffffff7, &x57);
-{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
-{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
-{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
-{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
-{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
-{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
-{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x7ffff, &x78);
-{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
-{ uint32_t x83 = cmovznz(x82, x78, x54);
-{ uint32_t x84 = cmovznz(x82, x75, x51);
-{ uint32_t x85 = cmovznz(x82, x72, x48);
-{ uint32_t x86 = cmovznz(x82, x69, x45);
-{ uint32_t x87 = cmovznz(x82, x66, x42);
-{ uint32_t x88 = cmovznz(x82, x63, x39);
-{ uint32_t x89 = cmovznz(x82, x60, x36);
-{ uint32_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint32_t out[8], const uint32_t in1[8], const uint32_t in2[8]) {
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x30 = in2[7];
+ { const uint32_t x31 = in2[6];
+ { const uint32_t x29 = in2[5];
+ { const uint32_t x27 = in2[4];
+ { const uint32_t x25 = in2[3];
+ { const uint32_t x23 = in2[2];
+ { const uint32_t x21 = in2[1];
+ { const uint32_t x19 = in2[0];
+ { uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+ { uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+ { uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+ { uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xfffffff7, &x57);
+ { uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+ { uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+ { uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+ { uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+ { uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+ { uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+ { uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x7ffff, &x78);
+ { uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+ { uint32_t x83 = cmovznz(x82, x78, x54);
+ { uint32_t x84 = cmovznz(x82, x75, x51);
+ { uint32_t x85 = cmovznz(x82, x72, x48);
+ { uint32_t x86 = cmovznz(x82, x69, x45);
+ { uint32_t x87 = cmovznz(x82, x66, x42);
+ { uint32_t x88 = cmovznz(x82, x63, x39);
+ { uint32_t x89 = cmovznz(x82, x60, x36);
+ { uint32_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e243m9/fenz.c b/src/Specific/montgomery32_2e243m9/fenz.c
index d35bfdd14..744f2aa5f 100644
--- a/src/Specific/montgomery32_2e243m9/fenz.c
+++ b/src/Specific/montgomery32_2e243m9/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x15 = (x14 | x13);
-{ uint32_t x16 = (x12 | x15);
-{ uint32_t x17 = (x10 | x16);
-{ uint32_t x18 = (x8 | x17);
-{ uint32_t x19 = (x6 | x18);
-{ uint32_t x20 = (x4 | x19);
-{ uint32_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x15 = (x14 | x13);
+ { uint32_t x16 = (x12 | x15);
+ { uint32_t x17 = (x10 | x16);
+ { uint32_t x18 = (x8 | x17);
+ { uint32_t x19 = (x6 | x18);
+ { uint32_t x20 = (x4 | x19);
+ { uint32_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e251m9/feadd.c b/src/Specific/montgomery32_2e251m9/feadd.c
index 808a39e88..6fc615365 100644
--- a/src/Specific/montgomery32_2e251m9/feadd.c
+++ b/src/Specific/montgomery32_2e251m9/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
-{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
-{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
-{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xfffffff7, &x57);
-{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
-{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
-{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
-{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
-{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
-{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
-{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x7ffffff, &x78);
-{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
-{ uint32_t x83 = cmovznz(x82, x78, x54);
-{ uint32_t x84 = cmovznz(x82, x75, x51);
-{ uint32_t x85 = cmovznz(x82, x72, x48);
-{ uint32_t x86 = cmovznz(x82, x69, x45);
-{ uint32_t x87 = cmovznz(x82, x66, x42);
-{ uint32_t x88 = cmovznz(x82, x63, x39);
-{ uint32_t x89 = cmovznz(x82, x60, x36);
-{ uint32_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint32_t out[8], const uint32_t in1[8], const uint32_t in2[8]) {
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x30 = in2[7];
+ { const uint32_t x31 = in2[6];
+ { const uint32_t x29 = in2[5];
+ { const uint32_t x27 = in2[4];
+ { const uint32_t x25 = in2[3];
+ { const uint32_t x23 = in2[2];
+ { const uint32_t x21 = in2[1];
+ { const uint32_t x19 = in2[0];
+ { uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+ { uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+ { uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+ { uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xfffffff7, &x57);
+ { uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+ { uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+ { uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+ { uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+ { uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+ { uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+ { uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x7ffffff, &x78);
+ { uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+ { uint32_t x83 = cmovznz(x82, x78, x54);
+ { uint32_t x84 = cmovznz(x82, x75, x51);
+ { uint32_t x85 = cmovznz(x82, x72, x48);
+ { uint32_t x86 = cmovznz(x82, x69, x45);
+ { uint32_t x87 = cmovznz(x82, x66, x42);
+ { uint32_t x88 = cmovznz(x82, x63, x39);
+ { uint32_t x89 = cmovznz(x82, x60, x36);
+ { uint32_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e251m9/fenz.c b/src/Specific/montgomery32_2e251m9/fenz.c
index d35bfdd14..744f2aa5f 100644
--- a/src/Specific/montgomery32_2e251m9/fenz.c
+++ b/src/Specific/montgomery32_2e251m9/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x15 = (x14 | x13);
-{ uint32_t x16 = (x12 | x15);
-{ uint32_t x17 = (x10 | x16);
-{ uint32_t x18 = (x8 | x17);
-{ uint32_t x19 = (x6 | x18);
-{ uint32_t x20 = (x4 | x19);
-{ uint32_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x15 = (x14 | x13);
+ { uint32_t x16 = (x12 | x15);
+ { uint32_t x17 = (x10 | x16);
+ { uint32_t x18 = (x8 | x17);
+ { uint32_t x19 = (x6 | x18);
+ { uint32_t x20 = (x4 | x19);
+ { uint32_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e254m127x2e240m1/feadd.c b/src/Specific/montgomery32_2e254m127x2e240m1/feadd.c
index bb1f6172f..81ecde594 100644
--- a/src/Specific/montgomery32_2e254m127x2e240m1/feadd.c
+++ b/src/Specific/montgomery32_2e254m127x2e240m1/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
-{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
-{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
-{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xffffffff, &x57);
-{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
-{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
-{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
-{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
-{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
-{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
-{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x3f80ffff, &x78);
-{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
-{ uint32_t x83 = cmovznz(x82, x78, x54);
-{ uint32_t x84 = cmovznz(x82, x75, x51);
-{ uint32_t x85 = cmovznz(x82, x72, x48);
-{ uint32_t x86 = cmovznz(x82, x69, x45);
-{ uint32_t x87 = cmovznz(x82, x66, x42);
-{ uint32_t x88 = cmovznz(x82, x63, x39);
-{ uint32_t x89 = cmovznz(x82, x60, x36);
-{ uint32_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint32_t out[8], const uint32_t in1[8], const uint32_t in2[8]) {
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x30 = in2[7];
+ { const uint32_t x31 = in2[6];
+ { const uint32_t x29 = in2[5];
+ { const uint32_t x27 = in2[4];
+ { const uint32_t x25 = in2[3];
+ { const uint32_t x23 = in2[2];
+ { const uint32_t x21 = in2[1];
+ { const uint32_t x19 = in2[0];
+ { uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+ { uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+ { uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+ { uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xffffffff, &x57);
+ { uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+ { uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+ { uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+ { uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+ { uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+ { uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+ { uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x3f80ffff, &x78);
+ { uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+ { uint32_t x83 = cmovznz(x82, x78, x54);
+ { uint32_t x84 = cmovznz(x82, x75, x51);
+ { uint32_t x85 = cmovznz(x82, x72, x48);
+ { uint32_t x86 = cmovznz(x82, x69, x45);
+ { uint32_t x87 = cmovznz(x82, x66, x42);
+ { uint32_t x88 = cmovznz(x82, x63, x39);
+ { uint32_t x89 = cmovznz(x82, x60, x36);
+ { uint32_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e254m127x2e240m1/fenz.c b/src/Specific/montgomery32_2e254m127x2e240m1/fenz.c
index d35bfdd14..744f2aa5f 100644
--- a/src/Specific/montgomery32_2e254m127x2e240m1/fenz.c
+++ b/src/Specific/montgomery32_2e254m127x2e240m1/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x15 = (x14 | x13);
-{ uint32_t x16 = (x12 | x15);
-{ uint32_t x17 = (x10 | x16);
-{ uint32_t x18 = (x8 | x17);
-{ uint32_t x19 = (x6 | x18);
-{ uint32_t x20 = (x4 | x19);
-{ uint32_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x15 = (x14 | x13);
+ { uint32_t x16 = (x12 | x15);
+ { uint32_t x17 = (x10 | x16);
+ { uint32_t x18 = (x8 | x17);
+ { uint32_t x19 = (x6 | x18);
+ { uint32_t x20 = (x4 | x19);
+ { uint32_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e255m19/feadd.c b/src/Specific/montgomery32_2e255m19/feadd.c
index e106e3cdc..e4f8afd79 100644
--- a/src/Specific/montgomery32_2e255m19/feadd.c
+++ b/src/Specific/montgomery32_2e255m19/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
-{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
-{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
-{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xffffffed, &x57);
-{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
-{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
-{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
-{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
-{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
-{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
-{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x7fffffff, &x78);
-{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
-{ uint32_t x83 = cmovznz(x82, x78, x54);
-{ uint32_t x84 = cmovznz(x82, x75, x51);
-{ uint32_t x85 = cmovznz(x82, x72, x48);
-{ uint32_t x86 = cmovznz(x82, x69, x45);
-{ uint32_t x87 = cmovznz(x82, x66, x42);
-{ uint32_t x88 = cmovznz(x82, x63, x39);
-{ uint32_t x89 = cmovznz(x82, x60, x36);
-{ uint32_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint32_t out[8], const uint32_t in1[8], const uint32_t in2[8]) {
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x30 = in2[7];
+ { const uint32_t x31 = in2[6];
+ { const uint32_t x29 = in2[5];
+ { const uint32_t x27 = in2[4];
+ { const uint32_t x25 = in2[3];
+ { const uint32_t x23 = in2[2];
+ { const uint32_t x21 = in2[1];
+ { const uint32_t x19 = in2[0];
+ { uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+ { uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+ { uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+ { uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xffffffed, &x57);
+ { uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+ { uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+ { uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+ { uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+ { uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+ { uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+ { uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x7fffffff, &x78);
+ { uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+ { uint32_t x83 = cmovznz(x82, x78, x54);
+ { uint32_t x84 = cmovznz(x82, x75, x51);
+ { uint32_t x85 = cmovznz(x82, x72, x48);
+ { uint32_t x86 = cmovznz(x82, x69, x45);
+ { uint32_t x87 = cmovznz(x82, x66, x42);
+ { uint32_t x88 = cmovznz(x82, x63, x39);
+ { uint32_t x89 = cmovznz(x82, x60, x36);
+ { uint32_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e255m19/fenz.c b/src/Specific/montgomery32_2e255m19/fenz.c
index d35bfdd14..744f2aa5f 100644
--- a/src/Specific/montgomery32_2e255m19/fenz.c
+++ b/src/Specific/montgomery32_2e255m19/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x15 = (x14 | x13);
-{ uint32_t x16 = (x12 | x15);
-{ uint32_t x17 = (x10 | x16);
-{ uint32_t x18 = (x8 | x17);
-{ uint32_t x19 = (x6 | x18);
-{ uint32_t x20 = (x4 | x19);
-{ uint32_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x15 = (x14 | x13);
+ { uint32_t x16 = (x12 | x15);
+ { uint32_t x17 = (x10 | x16);
+ { uint32_t x18 = (x8 | x17);
+ { uint32_t x19 = (x6 | x18);
+ { uint32_t x20 = (x4 | x19);
+ { uint32_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e255m2e4m2e1m1/feadd.c b/src/Specific/montgomery32_2e255m2e4m2e1m1/feadd.c
index e106e3cdc..e4f8afd79 100644
--- a/src/Specific/montgomery32_2e255m2e4m2e1m1/feadd.c
+++ b/src/Specific/montgomery32_2e255m2e4m2e1m1/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
-{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
-{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
-{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xffffffed, &x57);
-{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
-{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
-{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
-{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
-{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
-{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
-{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x7fffffff, &x78);
-{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
-{ uint32_t x83 = cmovznz(x82, x78, x54);
-{ uint32_t x84 = cmovznz(x82, x75, x51);
-{ uint32_t x85 = cmovznz(x82, x72, x48);
-{ uint32_t x86 = cmovznz(x82, x69, x45);
-{ uint32_t x87 = cmovznz(x82, x66, x42);
-{ uint32_t x88 = cmovznz(x82, x63, x39);
-{ uint32_t x89 = cmovznz(x82, x60, x36);
-{ uint32_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint32_t out[8], const uint32_t in1[8], const uint32_t in2[8]) {
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x30 = in2[7];
+ { const uint32_t x31 = in2[6];
+ { const uint32_t x29 = in2[5];
+ { const uint32_t x27 = in2[4];
+ { const uint32_t x25 = in2[3];
+ { const uint32_t x23 = in2[2];
+ { const uint32_t x21 = in2[1];
+ { const uint32_t x19 = in2[0];
+ { uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+ { uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+ { uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+ { uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xffffffed, &x57);
+ { uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+ { uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+ { uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+ { uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+ { uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+ { uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+ { uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x7fffffff, &x78);
+ { uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+ { uint32_t x83 = cmovznz(x82, x78, x54);
+ { uint32_t x84 = cmovznz(x82, x75, x51);
+ { uint32_t x85 = cmovznz(x82, x72, x48);
+ { uint32_t x86 = cmovznz(x82, x69, x45);
+ { uint32_t x87 = cmovznz(x82, x66, x42);
+ { uint32_t x88 = cmovznz(x82, x63, x39);
+ { uint32_t x89 = cmovznz(x82, x60, x36);
+ { uint32_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e255m2e4m2e1m1/fenz.c b/src/Specific/montgomery32_2e255m2e4m2e1m1/fenz.c
index d35bfdd14..744f2aa5f 100644
--- a/src/Specific/montgomery32_2e255m2e4m2e1m1/fenz.c
+++ b/src/Specific/montgomery32_2e255m2e4m2e1m1/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x15 = (x14 | x13);
-{ uint32_t x16 = (x12 | x15);
-{ uint32_t x17 = (x10 | x16);
-{ uint32_t x18 = (x8 | x17);
-{ uint32_t x19 = (x6 | x18);
-{ uint32_t x20 = (x4 | x19);
-{ uint32_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x15 = (x14 | x13);
+ { uint32_t x16 = (x12 | x15);
+ { uint32_t x17 = (x10 | x16);
+ { uint32_t x18 = (x8 | x17);
+ { uint32_t x19 = (x6 | x18);
+ { uint32_t x20 = (x4 | x19);
+ { uint32_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e255m765/feadd.c b/src/Specific/montgomery32_2e255m765/feadd.c
index d2574eb59..44c42965c 100644
--- a/src/Specific/montgomery32_2e255m765/feadd.c
+++ b/src/Specific/montgomery32_2e255m765/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
-{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
-{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
-{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xfffffd03, &x57);
-{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
-{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
-{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
-{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
-{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
-{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
-{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x7fffffff, &x78);
-{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
-{ uint32_t x83 = cmovznz(x82, x78, x54);
-{ uint32_t x84 = cmovznz(x82, x75, x51);
-{ uint32_t x85 = cmovznz(x82, x72, x48);
-{ uint32_t x86 = cmovznz(x82, x69, x45);
-{ uint32_t x87 = cmovznz(x82, x66, x42);
-{ uint32_t x88 = cmovznz(x82, x63, x39);
-{ uint32_t x89 = cmovznz(x82, x60, x36);
-{ uint32_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint32_t out[8], const uint32_t in1[8], const uint32_t in2[8]) {
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x30 = in2[7];
+ { const uint32_t x31 = in2[6];
+ { const uint32_t x29 = in2[5];
+ { const uint32_t x27 = in2[4];
+ { const uint32_t x25 = in2[3];
+ { const uint32_t x23 = in2[2];
+ { const uint32_t x21 = in2[1];
+ { const uint32_t x19 = in2[0];
+ { uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+ { uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+ { uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+ { uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xfffffd03, &x57);
+ { uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+ { uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+ { uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+ { uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+ { uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+ { uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+ { uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x7fffffff, &x78);
+ { uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+ { uint32_t x83 = cmovznz(x82, x78, x54);
+ { uint32_t x84 = cmovznz(x82, x75, x51);
+ { uint32_t x85 = cmovznz(x82, x72, x48);
+ { uint32_t x86 = cmovznz(x82, x69, x45);
+ { uint32_t x87 = cmovznz(x82, x66, x42);
+ { uint32_t x88 = cmovznz(x82, x63, x39);
+ { uint32_t x89 = cmovznz(x82, x60, x36);
+ { uint32_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e255m765/fenz.c b/src/Specific/montgomery32_2e255m765/fenz.c
index d35bfdd14..744f2aa5f 100644
--- a/src/Specific/montgomery32_2e255m765/fenz.c
+++ b/src/Specific/montgomery32_2e255m765/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x15 = (x14 | x13);
-{ uint32_t x16 = (x12 | x15);
-{ uint32_t x17 = (x10 | x16);
-{ uint32_t x18 = (x8 | x17);
-{ uint32_t x19 = (x6 | x18);
-{ uint32_t x20 = (x4 | x19);
-{ uint32_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x15 = (x14 | x13);
+ { uint32_t x16 = (x12 | x15);
+ { uint32_t x17 = (x10 | x16);
+ { uint32_t x18 = (x8 | x17);
+ { uint32_t x19 = (x6 | x18);
+ { uint32_t x20 = (x4 | x19);
+ { uint32_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e256m189/feadd.c b/src/Specific/montgomery32_2e256m189/feadd.c
index 552b1ff17..fa44e6837 100644
--- a/src/Specific/montgomery32_2e256m189/feadd.c
+++ b/src/Specific/montgomery32_2e256m189/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
-{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
-{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
-{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xffffff43, &x57);
-{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
-{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
-{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
-{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
-{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
-{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
-{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0xffffffff, &x78);
-{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
-{ uint32_t x83 = cmovznz(x82, x78, x54);
-{ uint32_t x84 = cmovznz(x82, x75, x51);
-{ uint32_t x85 = cmovznz(x82, x72, x48);
-{ uint32_t x86 = cmovznz(x82, x69, x45);
-{ uint32_t x87 = cmovznz(x82, x66, x42);
-{ uint32_t x88 = cmovznz(x82, x63, x39);
-{ uint32_t x89 = cmovznz(x82, x60, x36);
-{ uint32_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint32_t out[8], const uint32_t in1[8], const uint32_t in2[8]) {
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x30 = in2[7];
+ { const uint32_t x31 = in2[6];
+ { const uint32_t x29 = in2[5];
+ { const uint32_t x27 = in2[4];
+ { const uint32_t x25 = in2[3];
+ { const uint32_t x23 = in2[2];
+ { const uint32_t x21 = in2[1];
+ { const uint32_t x19 = in2[0];
+ { uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+ { uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+ { uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+ { uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xffffff43, &x57);
+ { uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+ { uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+ { uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+ { uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+ { uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+ { uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+ { uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0xffffffff, &x78);
+ { uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+ { uint32_t x83 = cmovznz(x82, x78, x54);
+ { uint32_t x84 = cmovznz(x82, x75, x51);
+ { uint32_t x85 = cmovznz(x82, x72, x48);
+ { uint32_t x86 = cmovznz(x82, x69, x45);
+ { uint32_t x87 = cmovznz(x82, x66, x42);
+ { uint32_t x88 = cmovznz(x82, x63, x39);
+ { uint32_t x89 = cmovznz(x82, x60, x36);
+ { uint32_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e256m189/fenz.c b/src/Specific/montgomery32_2e256m189/fenz.c
index d35bfdd14..744f2aa5f 100644
--- a/src/Specific/montgomery32_2e256m189/fenz.c
+++ b/src/Specific/montgomery32_2e256m189/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x15 = (x14 | x13);
-{ uint32_t x16 = (x12 | x15);
-{ uint32_t x17 = (x10 | x16);
-{ uint32_t x18 = (x8 | x17);
-{ uint32_t x19 = (x6 | x18);
-{ uint32_t x20 = (x4 | x19);
-{ uint32_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x15 = (x14 | x13);
+ { uint32_t x16 = (x12 | x15);
+ { uint32_t x17 = (x10 | x16);
+ { uint32_t x18 = (x8 | x17);
+ { uint32_t x19 = (x6 | x18);
+ { uint32_t x20 = (x4 | x19);
+ { uint32_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/feadd.c b/src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/feadd.c
index 30a7671cb..77ab95b28 100644
--- a/src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/feadd.c
+++ b/src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
-{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
-{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
-{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xffffffff, &x57);
-{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
-{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
-{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0x0, &x66);
-{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0x0, &x69);
-{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0x0, &x72);
-{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0x1, &x75);
-{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0xffffffff, &x78);
-{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
-{ uint32_t x83 = cmovznz(x82, x78, x54);
-{ uint32_t x84 = cmovznz(x82, x75, x51);
-{ uint32_t x85 = cmovznz(x82, x72, x48);
-{ uint32_t x86 = cmovznz(x82, x69, x45);
-{ uint32_t x87 = cmovznz(x82, x66, x42);
-{ uint32_t x88 = cmovznz(x82, x63, x39);
-{ uint32_t x89 = cmovznz(x82, x60, x36);
-{ uint32_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint32_t out[8], const uint32_t in1[8], const uint32_t in2[8]) {
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x30 = in2[7];
+ { const uint32_t x31 = in2[6];
+ { const uint32_t x29 = in2[5];
+ { const uint32_t x27 = in2[4];
+ { const uint32_t x25 = in2[3];
+ { const uint32_t x23 = in2[2];
+ { const uint32_t x21 = in2[1];
+ { const uint32_t x19 = in2[0];
+ { uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+ { uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+ { uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+ { uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xffffffff, &x57);
+ { uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+ { uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+ { uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0x0, &x66);
+ { uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0x0, &x69);
+ { uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0x0, &x72);
+ { uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0x1, &x75);
+ { uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0xffffffff, &x78);
+ { uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+ { uint32_t x83 = cmovznz(x82, x78, x54);
+ { uint32_t x84 = cmovznz(x82, x75, x51);
+ { uint32_t x85 = cmovznz(x82, x72, x48);
+ { uint32_t x86 = cmovznz(x82, x69, x45);
+ { uint32_t x87 = cmovznz(x82, x66, x42);
+ { uint32_t x88 = cmovznz(x82, x63, x39);
+ { uint32_t x89 = cmovznz(x82, x60, x36);
+ { uint32_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/fenz.c b/src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/fenz.c
index d35bfdd14..744f2aa5f 100644
--- a/src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/fenz.c
+++ b/src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x15 = (x14 | x13);
-{ uint32_t x16 = (x12 | x15);
-{ uint32_t x17 = (x10 | x16);
-{ uint32_t x18 = (x8 | x17);
-{ uint32_t x19 = (x6 | x18);
-{ uint32_t x20 = (x4 | x19);
-{ uint32_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x15 = (x14 | x13);
+ { uint32_t x16 = (x12 | x15);
+ { uint32_t x17 = (x10 | x16);
+ { uint32_t x18 = (x8 | x17);
+ { uint32_t x19 = (x6 | x18);
+ { uint32_t x20 = (x4 | x19);
+ { uint32_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e256m2e32m977/feadd.c b/src/Specific/montgomery32_2e256m2e32m977/feadd.c
index 5741bb5c6..2fbefd03e 100644
--- a/src/Specific/montgomery32_2e256m2e32m977/feadd.c
+++ b/src/Specific/montgomery32_2e256m2e32m977/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
-{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
-{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
-{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xfffffc2f, &x57);
-{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xfffffffe, &x60);
-{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
-{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
-{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
-{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
-{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
-{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0xffffffff, &x78);
-{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
-{ uint32_t x83 = cmovznz(x82, x78, x54);
-{ uint32_t x84 = cmovznz(x82, x75, x51);
-{ uint32_t x85 = cmovznz(x82, x72, x48);
-{ uint32_t x86 = cmovznz(x82, x69, x45);
-{ uint32_t x87 = cmovznz(x82, x66, x42);
-{ uint32_t x88 = cmovznz(x82, x63, x39);
-{ uint32_t x89 = cmovznz(x82, x60, x36);
-{ uint32_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint32_t out[8], const uint32_t in1[8], const uint32_t in2[8]) {
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x30 = in2[7];
+ { const uint32_t x31 = in2[6];
+ { const uint32_t x29 = in2[5];
+ { const uint32_t x27 = in2[4];
+ { const uint32_t x25 = in2[3];
+ { const uint32_t x23 = in2[2];
+ { const uint32_t x21 = in2[1];
+ { const uint32_t x19 = in2[0];
+ { uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+ { uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+ { uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+ { uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xfffffc2f, &x57);
+ { uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xfffffffe, &x60);
+ { uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+ { uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+ { uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+ { uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+ { uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+ { uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0xffffffff, &x78);
+ { uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+ { uint32_t x83 = cmovznz(x82, x78, x54);
+ { uint32_t x84 = cmovznz(x82, x75, x51);
+ { uint32_t x85 = cmovznz(x82, x72, x48);
+ { uint32_t x86 = cmovznz(x82, x69, x45);
+ { uint32_t x87 = cmovznz(x82, x66, x42);
+ { uint32_t x88 = cmovznz(x82, x63, x39);
+ { uint32_t x89 = cmovznz(x82, x60, x36);
+ { uint32_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e256m2e32m977/fenz.c b/src/Specific/montgomery32_2e256m2e32m977/fenz.c
index d35bfdd14..744f2aa5f 100644
--- a/src/Specific/montgomery32_2e256m2e32m977/fenz.c
+++ b/src/Specific/montgomery32_2e256m2e32m977/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x15 = (x14 | x13);
-{ uint32_t x16 = (x12 | x15);
-{ uint32_t x17 = (x10 | x16);
-{ uint32_t x18 = (x8 | x17);
-{ uint32_t x19 = (x6 | x18);
-{ uint32_t x20 = (x4 | x19);
-{ uint32_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x15 = (x14 | x13);
+ { uint32_t x16 = (x12 | x15);
+ { uint32_t x17 = (x10 | x16);
+ { uint32_t x18 = (x8 | x17);
+ { uint32_t x19 = (x6 | x18);
+ { uint32_t x20 = (x4 | x19);
+ { uint32_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e256m88x2e240m1/feadd.c b/src/Specific/montgomery32_2e256m88x2e240m1/feadd.c
index c1853e8e8..bdc9dd0dd 100644
--- a/src/Specific/montgomery32_2e256m88x2e240m1/feadd.c
+++ b/src/Specific/montgomery32_2e256m88x2e240m1/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
-{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
-{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
-{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
-{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
-{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
-{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xffffffff, &x57);
-{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
-{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
-{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
-{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
-{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
-{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
-{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0xffa7ffff, &x78);
-{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
-{ uint32_t x83 = cmovznz(x82, x78, x54);
-{ uint32_t x84 = cmovznz(x82, x75, x51);
-{ uint32_t x85 = cmovznz(x82, x72, x48);
-{ uint32_t x86 = cmovznz(x82, x69, x45);
-{ uint32_t x87 = cmovznz(x82, x66, x42);
-{ uint32_t x88 = cmovznz(x82, x63, x39);
-{ uint32_t x89 = cmovznz(x82, x60, x36);
-{ uint32_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint32_t out[8], const uint32_t in1[8], const uint32_t in2[8]) {
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x30 = in2[7];
+ { const uint32_t x31 = in2[6];
+ { const uint32_t x29 = in2[5];
+ { const uint32_t x27 = in2[4];
+ { const uint32_t x25 = in2[3];
+ { const uint32_t x23 = in2[2];
+ { const uint32_t x21 = in2[1];
+ { const uint32_t x19 = in2[0];
+ { uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+ { uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+ { uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+ { uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+ { uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+ { uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+ { uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xffffffff, &x57);
+ { uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+ { uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+ { uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+ { uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+ { uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+ { uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+ { uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0xffa7ffff, &x78);
+ { uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+ { uint32_t x83 = cmovznz(x82, x78, x54);
+ { uint32_t x84 = cmovznz(x82, x75, x51);
+ { uint32_t x85 = cmovznz(x82, x72, x48);
+ { uint32_t x86 = cmovznz(x82, x69, x45);
+ { uint32_t x87 = cmovznz(x82, x66, x42);
+ { uint32_t x88 = cmovznz(x82, x63, x39);
+ { uint32_t x89 = cmovznz(x82, x60, x36);
+ { uint32_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e256m88x2e240m1/fenz.c b/src/Specific/montgomery32_2e256m88x2e240m1/fenz.c
index d35bfdd14..744f2aa5f 100644
--- a/src/Specific/montgomery32_2e256m88x2e240m1/fenz.c
+++ b/src/Specific/montgomery32_2e256m88x2e240m1/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x15 = (x14 | x13);
-{ uint32_t x16 = (x12 | x15);
-{ uint32_t x17 = (x10 | x16);
-{ uint32_t x18 = (x8 | x17);
-{ uint32_t x19 = (x6 | x18);
-{ uint32_t x20 = (x4 | x19);
-{ uint32_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x15 = (x14 | x13);
+ { uint32_t x16 = (x12 | x15);
+ { uint32_t x17 = (x10 | x16);
+ { uint32_t x18 = (x8 | x17);
+ { uint32_t x19 = (x6 | x18);
+ { uint32_t x20 = (x4 | x19);
+ { uint32_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e266m3/feadd.c b/src/Specific/montgomery32_2e266m3/feadd.c
index 4ec364754..d3ccc7460 100644
--- a/src/Specific/montgomery32_2e266m3/feadd.c
+++ b/src/Specific/montgomery32_2e266m3/feadd.c
@@ -1,58 +1,58 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
-{ uint32_t x37; uint8_t x38 = _addcarryx_u32(0x0, x5, x21, &x37);
-{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x7, x23, &x40);
-{ uint32_t x43; uint8_t x44 = _addcarryx_u32(x41, x9, x25, &x43);
-{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x11, x27, &x46);
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x13, x29, &x49);
-{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x15, x31, &x52);
-{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x17, x33, &x55);
-{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x19, x35, &x58);
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x18, x34, &x61);
-{ uint32_t x64; uint8_t x65 = _subborrow_u32(0x0, x37, 0xfffffffd, &x64);
-{ uint32_t x67; uint8_t x68 = _subborrow_u32(x65, x40, 0xffffffff, &x67);
-{ uint32_t x70; uint8_t x71 = _subborrow_u32(x68, x43, 0xffffffff, &x70);
-{ uint32_t x73; uint8_t x74 = _subborrow_u32(x71, x46, 0xffffffff, &x73);
-{ uint32_t x76; uint8_t x77 = _subborrow_u32(x74, x49, 0xffffffff, &x76);
-{ uint32_t x79; uint8_t x80 = _subborrow_u32(x77, x52, 0xffffffff, &x79);
-{ uint32_t x82; uint8_t x83 = _subborrow_u32(x80, x55, 0xffffffff, &x82);
-{ uint32_t x85; uint8_t x86 = _subborrow_u32(x83, x58, 0xffffffff, &x85);
-{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x61, 0x3ff, &x88);
-{ uint32_t _; uint8_t x92 = _subborrow_u32(x89, x62, 0x0, &_);
-{ uint32_t x93 = cmovznz(x92, x88, x61);
-{ uint32_t x94 = cmovznz(x92, x85, x58);
-{ uint32_t x95 = cmovznz(x92, x82, x55);
-{ uint32_t x96 = cmovznz(x92, x79, x52);
-{ uint32_t x97 = cmovznz(x92, x76, x49);
-{ uint32_t x98 = cmovznz(x92, x73, x46);
-{ uint32_t x99 = cmovznz(x92, x70, x43);
-{ uint32_t x100 = cmovznz(x92, x67, x40);
-{ uint32_t x101 = cmovznz(x92, x64, x37);
-out[0] = x93;
-out[1] = x94;
-out[2] = x95;
-out[3] = x96;
-out[4] = x97;
-out[5] = x98;
-out[6] = x99;
-out[7] = x100;
-out[8] = x101;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void feadd(uint32_t out[9], const uint32_t in1[9], const uint32_t in2[9]) {
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x34 = in2[8];
+ { const uint32_t x35 = in2[7];
+ { const uint32_t x33 = in2[6];
+ { const uint32_t x31 = in2[5];
+ { const uint32_t x29 = in2[4];
+ { const uint32_t x27 = in2[3];
+ { const uint32_t x25 = in2[2];
+ { const uint32_t x23 = in2[1];
+ { const uint32_t x21 = in2[0];
+ { uint32_t x37; uint8_t x38 = _addcarryx_u32(0x0, x5, x21, &x37);
+ { uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x7, x23, &x40);
+ { uint32_t x43; uint8_t x44 = _addcarryx_u32(x41, x9, x25, &x43);
+ { uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x11, x27, &x46);
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x13, x29, &x49);
+ { uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x15, x31, &x52);
+ { uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x17, x33, &x55);
+ { uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x19, x35, &x58);
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x18, x34, &x61);
+ { uint32_t x64; uint8_t x65 = _subborrow_u32(0x0, x37, 0xfffffffd, &x64);
+ { uint32_t x67; uint8_t x68 = _subborrow_u32(x65, x40, 0xffffffff, &x67);
+ { uint32_t x70; uint8_t x71 = _subborrow_u32(x68, x43, 0xffffffff, &x70);
+ { uint32_t x73; uint8_t x74 = _subborrow_u32(x71, x46, 0xffffffff, &x73);
+ { uint32_t x76; uint8_t x77 = _subborrow_u32(x74, x49, 0xffffffff, &x76);
+ { uint32_t x79; uint8_t x80 = _subborrow_u32(x77, x52, 0xffffffff, &x79);
+ { uint32_t x82; uint8_t x83 = _subborrow_u32(x80, x55, 0xffffffff, &x82);
+ { uint32_t x85; uint8_t x86 = _subborrow_u32(x83, x58, 0xffffffff, &x85);
+ { uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x61, 0x3ff, &x88);
+ { uint32_t _; uint8_t x92 = _subborrow_u32(x89, x62, 0x0, &_);
+ { uint32_t x93 = cmovznz(x92, x88, x61);
+ { uint32_t x94 = cmovznz(x92, x85, x58);
+ { uint32_t x95 = cmovznz(x92, x82, x55);
+ { uint32_t x96 = cmovznz(x92, x79, x52);
+ { uint32_t x97 = cmovznz(x92, x76, x49);
+ { uint32_t x98 = cmovznz(x92, x73, x46);
+ { uint32_t x99 = cmovznz(x92, x70, x43);
+ { uint32_t x100 = cmovznz(x92, x67, x40);
+ { uint32_t x101 = cmovznz(x92, x64, x37);
+ out[0] = x101;
+ out[1] = x100;
+ out[2] = x99;
+ out[3] = x98;
+ out[4] = x97;
+ out[5] = x96;
+ out[6] = x95;
+ out[7] = x94;
+ out[8] = x93;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e266m3/fenz.c b/src/Specific/montgomery32_2e266m3/fenz.c
index 2566ebcbc..c10600790 100644
--- a/src/Specific/montgomery32_2e266m3/fenz.c
+++ b/src/Specific/montgomery32_2e266m3/fenz.c
@@ -1,30 +1,21 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x17 = (x16 | x15);
-{ uint32_t x18 = (x14 | x17);
-{ uint32_t x19 = (x12 | x18);
-{ uint32_t x20 = (x10 | x19);
-{ uint32_t x21 = (x8 | x20);
-{ uint32_t x22 = (x6 | x21);
-{ uint32_t x23 = (x4 | x22);
-{ uint32_t x24 = (x2 | x23);
-out[0] = x24;
-}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[9]) {
+ { const uint32_t x15 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x17 = (x16 | x15);
+ { uint32_t x18 = (x14 | x17);
+ { uint32_t x19 = (x12 | x18);
+ { uint32_t x20 = (x10 | x19);
+ { uint32_t x21 = (x8 | x20);
+ { uint32_t x22 = (x6 | x21);
+ { uint32_t x23 = (x4 | x22);
+ { uint32_t x24 = (x2 | x23);
+ out[0] = x24;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e285m9/feadd.c b/src/Specific/montgomery32_2e285m9/feadd.c
index ce65d9fa5..e22f39262 100644
--- a/src/Specific/montgomery32_2e285m9/feadd.c
+++ b/src/Specific/montgomery32_2e285m9/feadd.c
@@ -1,58 +1,58 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
-{ uint32_t x37; uint8_t x38 = _addcarryx_u32(0x0, x5, x21, &x37);
-{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x7, x23, &x40);
-{ uint32_t x43; uint8_t x44 = _addcarryx_u32(x41, x9, x25, &x43);
-{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x11, x27, &x46);
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x13, x29, &x49);
-{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x15, x31, &x52);
-{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x17, x33, &x55);
-{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x19, x35, &x58);
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x18, x34, &x61);
-{ uint32_t x64; uint8_t x65 = _subborrow_u32(0x0, x37, 0xfffffff7, &x64);
-{ uint32_t x67; uint8_t x68 = _subborrow_u32(x65, x40, 0xffffffff, &x67);
-{ uint32_t x70; uint8_t x71 = _subborrow_u32(x68, x43, 0xffffffff, &x70);
-{ uint32_t x73; uint8_t x74 = _subborrow_u32(x71, x46, 0xffffffff, &x73);
-{ uint32_t x76; uint8_t x77 = _subborrow_u32(x74, x49, 0xffffffff, &x76);
-{ uint32_t x79; uint8_t x80 = _subborrow_u32(x77, x52, 0xffffffff, &x79);
-{ uint32_t x82; uint8_t x83 = _subborrow_u32(x80, x55, 0xffffffff, &x82);
-{ uint32_t x85; uint8_t x86 = _subborrow_u32(x83, x58, 0xffffffff, &x85);
-{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x61, 0x1fffffff, &x88);
-{ uint32_t _; uint8_t x92 = _subborrow_u32(x89, x62, 0x0, &_);
-{ uint32_t x93 = cmovznz(x92, x88, x61);
-{ uint32_t x94 = cmovznz(x92, x85, x58);
-{ uint32_t x95 = cmovznz(x92, x82, x55);
-{ uint32_t x96 = cmovznz(x92, x79, x52);
-{ uint32_t x97 = cmovznz(x92, x76, x49);
-{ uint32_t x98 = cmovznz(x92, x73, x46);
-{ uint32_t x99 = cmovznz(x92, x70, x43);
-{ uint32_t x100 = cmovznz(x92, x67, x40);
-{ uint32_t x101 = cmovznz(x92, x64, x37);
-out[0] = x93;
-out[1] = x94;
-out[2] = x95;
-out[3] = x96;
-out[4] = x97;
-out[5] = x98;
-out[6] = x99;
-out[7] = x100;
-out[8] = x101;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void feadd(uint32_t out[9], const uint32_t in1[9], const uint32_t in2[9]) {
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x34 = in2[8];
+ { const uint32_t x35 = in2[7];
+ { const uint32_t x33 = in2[6];
+ { const uint32_t x31 = in2[5];
+ { const uint32_t x29 = in2[4];
+ { const uint32_t x27 = in2[3];
+ { const uint32_t x25 = in2[2];
+ { const uint32_t x23 = in2[1];
+ { const uint32_t x21 = in2[0];
+ { uint32_t x37; uint8_t x38 = _addcarryx_u32(0x0, x5, x21, &x37);
+ { uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x7, x23, &x40);
+ { uint32_t x43; uint8_t x44 = _addcarryx_u32(x41, x9, x25, &x43);
+ { uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x11, x27, &x46);
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x13, x29, &x49);
+ { uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x15, x31, &x52);
+ { uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x17, x33, &x55);
+ { uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x19, x35, &x58);
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x18, x34, &x61);
+ { uint32_t x64; uint8_t x65 = _subborrow_u32(0x0, x37, 0xfffffff7, &x64);
+ { uint32_t x67; uint8_t x68 = _subborrow_u32(x65, x40, 0xffffffff, &x67);
+ { uint32_t x70; uint8_t x71 = _subborrow_u32(x68, x43, 0xffffffff, &x70);
+ { uint32_t x73; uint8_t x74 = _subborrow_u32(x71, x46, 0xffffffff, &x73);
+ { uint32_t x76; uint8_t x77 = _subborrow_u32(x74, x49, 0xffffffff, &x76);
+ { uint32_t x79; uint8_t x80 = _subborrow_u32(x77, x52, 0xffffffff, &x79);
+ { uint32_t x82; uint8_t x83 = _subborrow_u32(x80, x55, 0xffffffff, &x82);
+ { uint32_t x85; uint8_t x86 = _subborrow_u32(x83, x58, 0xffffffff, &x85);
+ { uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x61, 0x1fffffff, &x88);
+ { uint32_t _; uint8_t x92 = _subborrow_u32(x89, x62, 0x0, &_);
+ { uint32_t x93 = cmovznz(x92, x88, x61);
+ { uint32_t x94 = cmovznz(x92, x85, x58);
+ { uint32_t x95 = cmovznz(x92, x82, x55);
+ { uint32_t x96 = cmovznz(x92, x79, x52);
+ { uint32_t x97 = cmovznz(x92, x76, x49);
+ { uint32_t x98 = cmovznz(x92, x73, x46);
+ { uint32_t x99 = cmovznz(x92, x70, x43);
+ { uint32_t x100 = cmovznz(x92, x67, x40);
+ { uint32_t x101 = cmovznz(x92, x64, x37);
+ out[0] = x101;
+ out[1] = x100;
+ out[2] = x99;
+ out[3] = x98;
+ out[4] = x97;
+ out[5] = x96;
+ out[6] = x95;
+ out[7] = x94;
+ out[8] = x93;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e285m9/fenz.c b/src/Specific/montgomery32_2e285m9/fenz.c
index 2566ebcbc..c10600790 100644
--- a/src/Specific/montgomery32_2e285m9/fenz.c
+++ b/src/Specific/montgomery32_2e285m9/fenz.c
@@ -1,30 +1,21 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x17 = (x16 | x15);
-{ uint32_t x18 = (x14 | x17);
-{ uint32_t x19 = (x12 | x18);
-{ uint32_t x20 = (x10 | x19);
-{ uint32_t x21 = (x8 | x20);
-{ uint32_t x22 = (x6 | x21);
-{ uint32_t x23 = (x4 | x22);
-{ uint32_t x24 = (x2 | x23);
-out[0] = x24;
-}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[9]) {
+ { const uint32_t x15 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x17 = (x16 | x15);
+ { uint32_t x18 = (x14 | x17);
+ { uint32_t x19 = (x12 | x18);
+ { uint32_t x20 = (x10 | x19);
+ { uint32_t x21 = (x8 | x20);
+ { uint32_t x22 = (x6 | x21);
+ { uint32_t x23 = (x4 | x22);
+ { uint32_t x24 = (x2 | x23);
+ out[0] = x24;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e291m19/feadd.c b/src/Specific/montgomery32_2e291m19/feadd.c
index a589c0281..d8b4e10e6 100644
--- a/src/Specific/montgomery32_2e291m19/feadd.c
+++ b/src/Specific/montgomery32_2e291m19/feadd.c
@@ -1,62 +1,64 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
-{ uint32_t x41; uint8_t x42 = _addcarryx_u32(0x0, x5, x23, &x41);
-{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x7, x25, &x44);
-{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x9, x27, &x47);
-{ uint32_t x50; uint8_t x51 = _addcarryx_u32(x48, x11, x29, &x50);
-{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x51, x13, x31, &x53);
-{ uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x15, x33, &x56);
-{ uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x17, x35, &x59);
-{ uint32_t x62; uint8_t x63 = _addcarryx_u32(x60, x19, x37, &x62);
-{ uint32_t x65; uint8_t x66 = _addcarryx_u32(x63, x21, x39, &x65);
-{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x20, x38, &x68);
-{ uint32_t x71; uint8_t x72 = _subborrow_u32(0x0, x41, 0xffffffed, &x71);
-{ uint32_t x74; uint8_t x75 = _subborrow_u32(x72, x44, 0xffffffff, &x74);
-{ uint32_t x77; uint8_t x78 = _subborrow_u32(x75, x47, 0xffffffff, &x77);
-{ uint32_t x80; uint8_t x81 = _subborrow_u32(x78, x50, 0xffffffff, &x80);
-{ uint32_t x83; uint8_t x84 = _subborrow_u32(x81, x53, 0xffffffff, &x83);
-{ uint32_t x86; uint8_t x87 = _subborrow_u32(x84, x56, 0xffffffff, &x86);
-{ uint32_t x89; uint8_t x90 = _subborrow_u32(x87, x59, 0xffffffff, &x89);
-{ uint32_t x92; uint8_t x93 = _subborrow_u32(x90, x62, 0xffffffff, &x92);
-{ uint32_t x95; uint8_t x96 = _subborrow_u32(x93, x65, 0xffffffff, &x95);
-{ uint32_t x98; uint8_t x99 = _subborrow_u32(x96, x68, 0x7, &x98);
-{ uint32_t _; uint8_t x102 = _subborrow_u32(x99, x69, 0x0, &_);
-{ uint32_t x103 = cmovznz(x102, x98, x68);
-{ uint32_t x104 = cmovznz(x102, x95, x65);
-{ uint32_t x105 = cmovznz(x102, x92, x62);
-{ uint32_t x106 = cmovznz(x102, x89, x59);
-{ uint32_t x107 = cmovznz(x102, x86, x56);
-{ uint32_t x108 = cmovznz(x102, x83, x53);
-{ uint32_t x109 = cmovznz(x102, x80, x50);
-{ uint32_t x110 = cmovznz(x102, x77, x47);
-{ uint32_t x111 = cmovznz(x102, x74, x44);
-{ uint32_t x112 = cmovznz(x102, x71, x41);
-out[0] = x103;
-out[1] = x104;
-out[2] = x105;
-out[3] = x106;
-out[4] = x107;
-out[5] = x108;
-out[6] = x109;
-out[7] = x110;
-out[8] = x111;
-out[9] = x112;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void feadd(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) {
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x38 = in2[9];
+ { const uint32_t x39 = in2[8];
+ { const uint32_t x37 = in2[7];
+ { const uint32_t x35 = in2[6];
+ { const uint32_t x33 = in2[5];
+ { const uint32_t x31 = in2[4];
+ { const uint32_t x29 = in2[3];
+ { const uint32_t x27 = in2[2];
+ { const uint32_t x25 = in2[1];
+ { const uint32_t x23 = in2[0];
+ { uint32_t x41; uint8_t x42 = _addcarryx_u32(0x0, x5, x23, &x41);
+ { uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x7, x25, &x44);
+ { uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x9, x27, &x47);
+ { uint32_t x50; uint8_t x51 = _addcarryx_u32(x48, x11, x29, &x50);
+ { uint32_t x53; uint8_t x54 = _addcarryx_u32(x51, x13, x31, &x53);
+ { uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x15, x33, &x56);
+ { uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x17, x35, &x59);
+ { uint32_t x62; uint8_t x63 = _addcarryx_u32(x60, x19, x37, &x62);
+ { uint32_t x65; uint8_t x66 = _addcarryx_u32(x63, x21, x39, &x65);
+ { uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x20, x38, &x68);
+ { uint32_t x71; uint8_t x72 = _subborrow_u32(0x0, x41, 0xffffffed, &x71);
+ { uint32_t x74; uint8_t x75 = _subborrow_u32(x72, x44, 0xffffffff, &x74);
+ { uint32_t x77; uint8_t x78 = _subborrow_u32(x75, x47, 0xffffffff, &x77);
+ { uint32_t x80; uint8_t x81 = _subborrow_u32(x78, x50, 0xffffffff, &x80);
+ { uint32_t x83; uint8_t x84 = _subborrow_u32(x81, x53, 0xffffffff, &x83);
+ { uint32_t x86; uint8_t x87 = _subborrow_u32(x84, x56, 0xffffffff, &x86);
+ { uint32_t x89; uint8_t x90 = _subborrow_u32(x87, x59, 0xffffffff, &x89);
+ { uint32_t x92; uint8_t x93 = _subborrow_u32(x90, x62, 0xffffffff, &x92);
+ { uint32_t x95; uint8_t x96 = _subborrow_u32(x93, x65, 0xffffffff, &x95);
+ { uint32_t x98; uint8_t x99 = _subborrow_u32(x96, x68, 0x7, &x98);
+ { uint32_t _; uint8_t x102 = _subborrow_u32(x99, x69, 0x0, &_);
+ { uint32_t x103 = cmovznz(x102, x98, x68);
+ { uint32_t x104 = cmovznz(x102, x95, x65);
+ { uint32_t x105 = cmovznz(x102, x92, x62);
+ { uint32_t x106 = cmovznz(x102, x89, x59);
+ { uint32_t x107 = cmovznz(x102, x86, x56);
+ { uint32_t x108 = cmovznz(x102, x83, x53);
+ { uint32_t x109 = cmovznz(x102, x80, x50);
+ { uint32_t x110 = cmovznz(x102, x77, x47);
+ { uint32_t x111 = cmovznz(x102, x74, x44);
+ { uint32_t x112 = cmovznz(x102, x71, x41);
+ out[0] = x112;
+ out[1] = x111;
+ out[2] = x110;
+ out[3] = x109;
+ out[4] = x108;
+ out[5] = x107;
+ out[6] = x106;
+ out[7] = x105;
+ out[8] = x104;
+ out[9] = x103;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e291m19/fenz.c b/src/Specific/montgomery32_2e291m19/fenz.c
index cca29049c..4ad084ead 100644
--- a/src/Specific/montgomery32_2e291m19/fenz.c
+++ b/src/Specific/montgomery32_2e291m19/fenz.c
@@ -1,31 +1,23 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x19 = (x18 | x17);
-{ uint32_t x20 = (x16 | x19);
-{ uint32_t x21 = (x14 | x20);
-{ uint32_t x22 = (x12 | x21);
-{ uint32_t x23 = (x10 | x22);
-{ uint32_t x24 = (x8 | x23);
-{ uint32_t x25 = (x6 | x24);
-{ uint32_t x26 = (x4 | x25);
-{ uint32_t x27 = (x2 | x26);
-out[0] = x27;
-}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[10]) {
+ { const uint32_t x17 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x19 = (x18 | x17);
+ { uint32_t x20 = (x16 | x19);
+ { uint32_t x21 = (x14 | x20);
+ { uint32_t x22 = (x12 | x21);
+ { uint32_t x23 = (x10 | x22);
+ { uint32_t x24 = (x8 | x23);
+ { uint32_t x25 = (x6 | x24);
+ { uint32_t x26 = (x4 | x25);
+ { uint32_t x27 = (x2 | x26);
+ out[0] = x27;
+ }}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e321m9/feadd.c b/src/Specific/montgomery32_2e321m9/feadd.c
index 665eaaa05..3ff7adea9 100644
--- a/src/Specific/montgomery32_2e321m9/feadd.c
+++ b/src/Specific/montgomery32_2e321m9/feadd.c
@@ -1,66 +1,70 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x42, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25)
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x5, x25, &x45);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x7, x27, &x48);
-{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x9, x29, &x51);
-{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x11, x31, &x54);
-{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x55, x13, x33, &x57);
-{ uint32_t x60; uint8_t x61 = _addcarryx_u32(x58, x15, x35, &x60);
-{ uint32_t x63; uint8_t x64 = _addcarryx_u32(x61, x17, x37, &x63);
-{ uint32_t x66; uint8_t x67 = _addcarryx_u32(x64, x19, x39, &x66);
-{ uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x21, x41, &x69);
-{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x23, x43, &x72);
-{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x22, x42, &x75);
-{ uint32_t x78; uint8_t x79 = _subborrow_u32(0x0, x45, 0xfffffff7, &x78);
-{ uint32_t x81; uint8_t x82 = _subborrow_u32(x79, x48, 0xffffffff, &x81);
-{ uint32_t x84; uint8_t x85 = _subborrow_u32(x82, x51, 0xffffffff, &x84);
-{ uint32_t x87; uint8_t x88 = _subborrow_u32(x85, x54, 0xffffffff, &x87);
-{ uint32_t x90; uint8_t x91 = _subborrow_u32(x88, x57, 0xffffffff, &x90);
-{ uint32_t x93; uint8_t x94 = _subborrow_u32(x91, x60, 0xffffffff, &x93);
-{ uint32_t x96; uint8_t x97 = _subborrow_u32(x94, x63, 0xffffffff, &x96);
-{ uint32_t x99; uint8_t x100 = _subborrow_u32(x97, x66, 0xffffffff, &x99);
-{ uint32_t x102; uint8_t x103 = _subborrow_u32(x100, x69, 0xffffffff, &x102);
-{ uint32_t x105; uint8_t x106 = _subborrow_u32(x103, x72, 0xffffffff, &x105);
-{ uint32_t x108; uint8_t x109 = _subborrow_u32(x106, x75, 0x1, &x108);
-{ uint32_t _; uint8_t x112 = _subborrow_u32(x109, x76, 0x0, &_);
-{ uint32_t x113 = cmovznz(x112, x108, x75);
-{ uint32_t x114 = cmovznz(x112, x105, x72);
-{ uint32_t x115 = cmovznz(x112, x102, x69);
-{ uint32_t x116 = cmovznz(x112, x99, x66);
-{ uint32_t x117 = cmovznz(x112, x96, x63);
-{ uint32_t x118 = cmovznz(x112, x93, x60);
-{ uint32_t x119 = cmovznz(x112, x90, x57);
-{ uint32_t x120 = cmovznz(x112, x87, x54);
-{ uint32_t x121 = cmovznz(x112, x84, x51);
-{ uint32_t x122 = cmovznz(x112, x81, x48);
-{ uint32_t x123 = cmovznz(x112, x78, x45);
-out[0] = x113;
-out[1] = x114;
-out[2] = x115;
-out[3] = x116;
-out[4] = x117;
-out[5] = x118;
-out[6] = x119;
-out[7] = x120;
-out[8] = x121;
-out[9] = x122;
-out[10] = x123;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[11];
+static void feadd(uint32_t out[11], const uint32_t in1[11], const uint32_t in2[11]) {
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x42 = in2[10];
+ { const uint32_t x43 = in2[9];
+ { const uint32_t x41 = in2[8];
+ { const uint32_t x39 = in2[7];
+ { const uint32_t x37 = in2[6];
+ { const uint32_t x35 = in2[5];
+ { const uint32_t x33 = in2[4];
+ { const uint32_t x31 = in2[3];
+ { const uint32_t x29 = in2[2];
+ { const uint32_t x27 = in2[1];
+ { const uint32_t x25 = in2[0];
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x5, x25, &x45);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x7, x27, &x48);
+ { uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x9, x29, &x51);
+ { uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x11, x31, &x54);
+ { uint32_t x57; uint8_t x58 = _addcarryx_u32(x55, x13, x33, &x57);
+ { uint32_t x60; uint8_t x61 = _addcarryx_u32(x58, x15, x35, &x60);
+ { uint32_t x63; uint8_t x64 = _addcarryx_u32(x61, x17, x37, &x63);
+ { uint32_t x66; uint8_t x67 = _addcarryx_u32(x64, x19, x39, &x66);
+ { uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x21, x41, &x69);
+ { uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x23, x43, &x72);
+ { uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x22, x42, &x75);
+ { uint32_t x78; uint8_t x79 = _subborrow_u32(0x0, x45, 0xfffffff7, &x78);
+ { uint32_t x81; uint8_t x82 = _subborrow_u32(x79, x48, 0xffffffff, &x81);
+ { uint32_t x84; uint8_t x85 = _subborrow_u32(x82, x51, 0xffffffff, &x84);
+ { uint32_t x87; uint8_t x88 = _subborrow_u32(x85, x54, 0xffffffff, &x87);
+ { uint32_t x90; uint8_t x91 = _subborrow_u32(x88, x57, 0xffffffff, &x90);
+ { uint32_t x93; uint8_t x94 = _subborrow_u32(x91, x60, 0xffffffff, &x93);
+ { uint32_t x96; uint8_t x97 = _subborrow_u32(x94, x63, 0xffffffff, &x96);
+ { uint32_t x99; uint8_t x100 = _subborrow_u32(x97, x66, 0xffffffff, &x99);
+ { uint32_t x102; uint8_t x103 = _subborrow_u32(x100, x69, 0xffffffff, &x102);
+ { uint32_t x105; uint8_t x106 = _subborrow_u32(x103, x72, 0xffffffff, &x105);
+ { uint32_t x108; uint8_t x109 = _subborrow_u32(x106, x75, 0x1, &x108);
+ { uint32_t _; uint8_t x112 = _subborrow_u32(x109, x76, 0x0, &_);
+ { uint32_t x113 = cmovznz(x112, x108, x75);
+ { uint32_t x114 = cmovznz(x112, x105, x72);
+ { uint32_t x115 = cmovznz(x112, x102, x69);
+ { uint32_t x116 = cmovznz(x112, x99, x66);
+ { uint32_t x117 = cmovznz(x112, x96, x63);
+ { uint32_t x118 = cmovznz(x112, x93, x60);
+ { uint32_t x119 = cmovznz(x112, x90, x57);
+ { uint32_t x120 = cmovznz(x112, x87, x54);
+ { uint32_t x121 = cmovznz(x112, x84, x51);
+ { uint32_t x122 = cmovznz(x112, x81, x48);
+ { uint32_t x123 = cmovznz(x112, x78, x45);
+ out[0] = x123;
+ out[1] = x122;
+ out[2] = x121;
+ out[3] = x120;
+ out[4] = x119;
+ out[5] = x118;
+ out[6] = x117;
+ out[7] = x116;
+ out[8] = x115;
+ out[9] = x114;
+ out[10] = x113;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e321m9/fenz.c b/src/Specific/montgomery32_2e321m9/fenz.c
index 9e77ab0f6..d962eda51 100644
--- a/src/Specific/montgomery32_2e321m9/fenz.c
+++ b/src/Specific/montgomery32_2e321m9/fenz.c
@@ -1,32 +1,25 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x21 = (x20 | x19);
-{ uint32_t x22 = (x18 | x21);
-{ uint32_t x23 = (x16 | x22);
-{ uint32_t x24 = (x14 | x23);
-{ uint32_t x25 = (x12 | x24);
-{ uint32_t x26 = (x10 | x25);
-{ uint32_t x27 = (x8 | x26);
-{ uint32_t x28 = (x6 | x27);
-{ uint32_t x29 = (x4 | x28);
-{ uint32_t x30 = (x2 | x29);
-out[0] = x30;
-}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[11]) {
+ { const uint32_t x19 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x21 = (x20 | x19);
+ { uint32_t x22 = (x18 | x21);
+ { uint32_t x23 = (x16 | x22);
+ { uint32_t x24 = (x14 | x23);
+ { uint32_t x25 = (x12 | x24);
+ { uint32_t x26 = (x10 | x25);
+ { uint32_t x27 = (x8 | x26);
+ { uint32_t x28 = (x6 | x27);
+ { uint32_t x29 = (x4 | x28);
+ { uint32_t x30 = (x2 | x29);
+ out[0] = x30;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e322m2e161m1/feadd.c b/src/Specific/montgomery32_2e322m2e161m1/feadd.c
index ab2d20442..13bacf203 100644
--- a/src/Specific/montgomery32_2e322m2e161m1/feadd.c
+++ b/src/Specific/montgomery32_2e322m2e161m1/feadd.c
@@ -1,66 +1,70 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x42, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25)
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x5, x25, &x45);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x7, x27, &x48);
-{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x9, x29, &x51);
-{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x11, x31, &x54);
-{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x55, x13, x33, &x57);
-{ uint32_t x60; uint8_t x61 = _addcarryx_u32(x58, x15, x35, &x60);
-{ uint32_t x63; uint8_t x64 = _addcarryx_u32(x61, x17, x37, &x63);
-{ uint32_t x66; uint8_t x67 = _addcarryx_u32(x64, x19, x39, &x66);
-{ uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x21, x41, &x69);
-{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x23, x43, &x72);
-{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x22, x42, &x75);
-{ uint32_t x78; uint8_t x79 = _subborrow_u32(0x0, x45, 0xffffffff, &x78);
-{ uint32_t x81; uint8_t x82 = _subborrow_u32(x79, x48, 0xffffffff, &x81);
-{ uint32_t x84; uint8_t x85 = _subborrow_u32(x82, x51, 0xffffffff, &x84);
-{ uint32_t x87; uint8_t x88 = _subborrow_u32(x85, x54, 0xffffffff, &x87);
-{ uint32_t x90; uint8_t x91 = _subborrow_u32(x88, x57, 0xffffffff, &x90);
-{ uint32_t x93; uint8_t x94 = _subborrow_u32(x91, x60, 0xfffffffd, &x93);
-{ uint32_t x96; uint8_t x97 = _subborrow_u32(x94, x63, 0xffffffff, &x96);
-{ uint32_t x99; uint8_t x100 = _subborrow_u32(x97, x66, 0xffffffff, &x99);
-{ uint32_t x102; uint8_t x103 = _subborrow_u32(x100, x69, 0xffffffff, &x102);
-{ uint32_t x105; uint8_t x106 = _subborrow_u32(x103, x72, 0xffffffff, &x105);
-{ uint32_t x108; uint8_t x109 = _subborrow_u32(x106, x75, 0x3, &x108);
-{ uint32_t _; uint8_t x112 = _subborrow_u32(x109, x76, 0x0, &_);
-{ uint32_t x113 = cmovznz(x112, x108, x75);
-{ uint32_t x114 = cmovznz(x112, x105, x72);
-{ uint32_t x115 = cmovznz(x112, x102, x69);
-{ uint32_t x116 = cmovznz(x112, x99, x66);
-{ uint32_t x117 = cmovznz(x112, x96, x63);
-{ uint32_t x118 = cmovznz(x112, x93, x60);
-{ uint32_t x119 = cmovznz(x112, x90, x57);
-{ uint32_t x120 = cmovznz(x112, x87, x54);
-{ uint32_t x121 = cmovznz(x112, x84, x51);
-{ uint32_t x122 = cmovznz(x112, x81, x48);
-{ uint32_t x123 = cmovznz(x112, x78, x45);
-out[0] = x113;
-out[1] = x114;
-out[2] = x115;
-out[3] = x116;
-out[4] = x117;
-out[5] = x118;
-out[6] = x119;
-out[7] = x120;
-out[8] = x121;
-out[9] = x122;
-out[10] = x123;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[11];
+static void feadd(uint32_t out[11], const uint32_t in1[11], const uint32_t in2[11]) {
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x42 = in2[10];
+ { const uint32_t x43 = in2[9];
+ { const uint32_t x41 = in2[8];
+ { const uint32_t x39 = in2[7];
+ { const uint32_t x37 = in2[6];
+ { const uint32_t x35 = in2[5];
+ { const uint32_t x33 = in2[4];
+ { const uint32_t x31 = in2[3];
+ { const uint32_t x29 = in2[2];
+ { const uint32_t x27 = in2[1];
+ { const uint32_t x25 = in2[0];
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x5, x25, &x45);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x7, x27, &x48);
+ { uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x9, x29, &x51);
+ { uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x11, x31, &x54);
+ { uint32_t x57; uint8_t x58 = _addcarryx_u32(x55, x13, x33, &x57);
+ { uint32_t x60; uint8_t x61 = _addcarryx_u32(x58, x15, x35, &x60);
+ { uint32_t x63; uint8_t x64 = _addcarryx_u32(x61, x17, x37, &x63);
+ { uint32_t x66; uint8_t x67 = _addcarryx_u32(x64, x19, x39, &x66);
+ { uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x21, x41, &x69);
+ { uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x23, x43, &x72);
+ { uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x22, x42, &x75);
+ { uint32_t x78; uint8_t x79 = _subborrow_u32(0x0, x45, 0xffffffff, &x78);
+ { uint32_t x81; uint8_t x82 = _subborrow_u32(x79, x48, 0xffffffff, &x81);
+ { uint32_t x84; uint8_t x85 = _subborrow_u32(x82, x51, 0xffffffff, &x84);
+ { uint32_t x87; uint8_t x88 = _subborrow_u32(x85, x54, 0xffffffff, &x87);
+ { uint32_t x90; uint8_t x91 = _subborrow_u32(x88, x57, 0xffffffff, &x90);
+ { uint32_t x93; uint8_t x94 = _subborrow_u32(x91, x60, 0xfffffffd, &x93);
+ { uint32_t x96; uint8_t x97 = _subborrow_u32(x94, x63, 0xffffffff, &x96);
+ { uint32_t x99; uint8_t x100 = _subborrow_u32(x97, x66, 0xffffffff, &x99);
+ { uint32_t x102; uint8_t x103 = _subborrow_u32(x100, x69, 0xffffffff, &x102);
+ { uint32_t x105; uint8_t x106 = _subborrow_u32(x103, x72, 0xffffffff, &x105);
+ { uint32_t x108; uint8_t x109 = _subborrow_u32(x106, x75, 0x3, &x108);
+ { uint32_t _; uint8_t x112 = _subborrow_u32(x109, x76, 0x0, &_);
+ { uint32_t x113 = cmovznz(x112, x108, x75);
+ { uint32_t x114 = cmovznz(x112, x105, x72);
+ { uint32_t x115 = cmovznz(x112, x102, x69);
+ { uint32_t x116 = cmovznz(x112, x99, x66);
+ { uint32_t x117 = cmovznz(x112, x96, x63);
+ { uint32_t x118 = cmovznz(x112, x93, x60);
+ { uint32_t x119 = cmovznz(x112, x90, x57);
+ { uint32_t x120 = cmovznz(x112, x87, x54);
+ { uint32_t x121 = cmovznz(x112, x84, x51);
+ { uint32_t x122 = cmovznz(x112, x81, x48);
+ { uint32_t x123 = cmovznz(x112, x78, x45);
+ out[0] = x123;
+ out[1] = x122;
+ out[2] = x121;
+ out[3] = x120;
+ out[4] = x119;
+ out[5] = x118;
+ out[6] = x117;
+ out[7] = x116;
+ out[8] = x115;
+ out[9] = x114;
+ out[10] = x113;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e322m2e161m1/fenz.c b/src/Specific/montgomery32_2e322m2e161m1/fenz.c
index 9e77ab0f6..d962eda51 100644
--- a/src/Specific/montgomery32_2e322m2e161m1/fenz.c
+++ b/src/Specific/montgomery32_2e322m2e161m1/fenz.c
@@ -1,32 +1,25 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x21 = (x20 | x19);
-{ uint32_t x22 = (x18 | x21);
-{ uint32_t x23 = (x16 | x22);
-{ uint32_t x24 = (x14 | x23);
-{ uint32_t x25 = (x12 | x24);
-{ uint32_t x26 = (x10 | x25);
-{ uint32_t x27 = (x8 | x26);
-{ uint32_t x28 = (x6 | x27);
-{ uint32_t x29 = (x4 | x28);
-{ uint32_t x30 = (x2 | x29);
-out[0] = x30;
-}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[11]) {
+ { const uint32_t x19 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x21 = (x20 | x19);
+ { uint32_t x22 = (x18 | x21);
+ { uint32_t x23 = (x16 | x22);
+ { uint32_t x24 = (x14 | x23);
+ { uint32_t x25 = (x12 | x24);
+ { uint32_t x26 = (x10 | x25);
+ { uint32_t x27 = (x8 | x26);
+ { uint32_t x28 = (x6 | x27);
+ { uint32_t x29 = (x4 | x28);
+ { uint32_t x30 = (x2 | x29);
+ out[0] = x30;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e336m17/feadd.c b/src/Specific/montgomery32_2e336m17/feadd.c
index 98051c22b..73f8dedff 100644
--- a/src/Specific/montgomery32_2e336m17/feadd.c
+++ b/src/Specific/montgomery32_2e336m17/feadd.c
@@ -1,66 +1,70 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x42, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25)
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x5, x25, &x45);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x7, x27, &x48);
-{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x9, x29, &x51);
-{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x11, x31, &x54);
-{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x55, x13, x33, &x57);
-{ uint32_t x60; uint8_t x61 = _addcarryx_u32(x58, x15, x35, &x60);
-{ uint32_t x63; uint8_t x64 = _addcarryx_u32(x61, x17, x37, &x63);
-{ uint32_t x66; uint8_t x67 = _addcarryx_u32(x64, x19, x39, &x66);
-{ uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x21, x41, &x69);
-{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x23, x43, &x72);
-{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x22, x42, &x75);
-{ uint32_t x78; uint8_t x79 = _subborrow_u32(0x0, x45, 0xffffffef, &x78);
-{ uint32_t x81; uint8_t x82 = _subborrow_u32(x79, x48, 0xffffffff, &x81);
-{ uint32_t x84; uint8_t x85 = _subborrow_u32(x82, x51, 0xffffffff, &x84);
-{ uint32_t x87; uint8_t x88 = _subborrow_u32(x85, x54, 0xffffffff, &x87);
-{ uint32_t x90; uint8_t x91 = _subborrow_u32(x88, x57, 0xffffffff, &x90);
-{ uint32_t x93; uint8_t x94 = _subborrow_u32(x91, x60, 0xffffffff, &x93);
-{ uint32_t x96; uint8_t x97 = _subborrow_u32(x94, x63, 0xffffffff, &x96);
-{ uint32_t x99; uint8_t x100 = _subborrow_u32(x97, x66, 0xffffffff, &x99);
-{ uint32_t x102; uint8_t x103 = _subborrow_u32(x100, x69, 0xffffffff, &x102);
-{ uint32_t x105; uint8_t x106 = _subborrow_u32(x103, x72, 0xffffffff, &x105);
-{ uint32_t x108; uint8_t x109 = _subborrow_u32(x106, x75, 0xffff, &x108);
-{ uint32_t _; uint8_t x112 = _subborrow_u32(x109, x76, 0x0, &_);
-{ uint32_t x113 = cmovznz(x112, x108, x75);
-{ uint32_t x114 = cmovznz(x112, x105, x72);
-{ uint32_t x115 = cmovznz(x112, x102, x69);
-{ uint32_t x116 = cmovznz(x112, x99, x66);
-{ uint32_t x117 = cmovznz(x112, x96, x63);
-{ uint32_t x118 = cmovznz(x112, x93, x60);
-{ uint32_t x119 = cmovznz(x112, x90, x57);
-{ uint32_t x120 = cmovznz(x112, x87, x54);
-{ uint32_t x121 = cmovznz(x112, x84, x51);
-{ uint32_t x122 = cmovznz(x112, x81, x48);
-{ uint32_t x123 = cmovznz(x112, x78, x45);
-out[0] = x113;
-out[1] = x114;
-out[2] = x115;
-out[3] = x116;
-out[4] = x117;
-out[5] = x118;
-out[6] = x119;
-out[7] = x120;
-out[8] = x121;
-out[9] = x122;
-out[10] = x123;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[11];
+static void feadd(uint32_t out[11], const uint32_t in1[11], const uint32_t in2[11]) {
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x42 = in2[10];
+ { const uint32_t x43 = in2[9];
+ { const uint32_t x41 = in2[8];
+ { const uint32_t x39 = in2[7];
+ { const uint32_t x37 = in2[6];
+ { const uint32_t x35 = in2[5];
+ { const uint32_t x33 = in2[4];
+ { const uint32_t x31 = in2[3];
+ { const uint32_t x29 = in2[2];
+ { const uint32_t x27 = in2[1];
+ { const uint32_t x25 = in2[0];
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x5, x25, &x45);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x7, x27, &x48);
+ { uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x9, x29, &x51);
+ { uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x11, x31, &x54);
+ { uint32_t x57; uint8_t x58 = _addcarryx_u32(x55, x13, x33, &x57);
+ { uint32_t x60; uint8_t x61 = _addcarryx_u32(x58, x15, x35, &x60);
+ { uint32_t x63; uint8_t x64 = _addcarryx_u32(x61, x17, x37, &x63);
+ { uint32_t x66; uint8_t x67 = _addcarryx_u32(x64, x19, x39, &x66);
+ { uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x21, x41, &x69);
+ { uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x23, x43, &x72);
+ { uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x22, x42, &x75);
+ { uint32_t x78; uint8_t x79 = _subborrow_u32(0x0, x45, 0xffffffef, &x78);
+ { uint32_t x81; uint8_t x82 = _subborrow_u32(x79, x48, 0xffffffff, &x81);
+ { uint32_t x84; uint8_t x85 = _subborrow_u32(x82, x51, 0xffffffff, &x84);
+ { uint32_t x87; uint8_t x88 = _subborrow_u32(x85, x54, 0xffffffff, &x87);
+ { uint32_t x90; uint8_t x91 = _subborrow_u32(x88, x57, 0xffffffff, &x90);
+ { uint32_t x93; uint8_t x94 = _subborrow_u32(x91, x60, 0xffffffff, &x93);
+ { uint32_t x96; uint8_t x97 = _subborrow_u32(x94, x63, 0xffffffff, &x96);
+ { uint32_t x99; uint8_t x100 = _subborrow_u32(x97, x66, 0xffffffff, &x99);
+ { uint32_t x102; uint8_t x103 = _subborrow_u32(x100, x69, 0xffffffff, &x102);
+ { uint32_t x105; uint8_t x106 = _subborrow_u32(x103, x72, 0xffffffff, &x105);
+ { uint32_t x108; uint8_t x109 = _subborrow_u32(x106, x75, 0xffff, &x108);
+ { uint32_t _; uint8_t x112 = _subborrow_u32(x109, x76, 0x0, &_);
+ { uint32_t x113 = cmovznz(x112, x108, x75);
+ { uint32_t x114 = cmovznz(x112, x105, x72);
+ { uint32_t x115 = cmovznz(x112, x102, x69);
+ { uint32_t x116 = cmovznz(x112, x99, x66);
+ { uint32_t x117 = cmovznz(x112, x96, x63);
+ { uint32_t x118 = cmovznz(x112, x93, x60);
+ { uint32_t x119 = cmovznz(x112, x90, x57);
+ { uint32_t x120 = cmovznz(x112, x87, x54);
+ { uint32_t x121 = cmovznz(x112, x84, x51);
+ { uint32_t x122 = cmovznz(x112, x81, x48);
+ { uint32_t x123 = cmovznz(x112, x78, x45);
+ out[0] = x123;
+ out[1] = x122;
+ out[2] = x121;
+ out[3] = x120;
+ out[4] = x119;
+ out[5] = x118;
+ out[6] = x117;
+ out[7] = x116;
+ out[8] = x115;
+ out[9] = x114;
+ out[10] = x113;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e336m17/fenz.c b/src/Specific/montgomery32_2e336m17/fenz.c
index 9e77ab0f6..d962eda51 100644
--- a/src/Specific/montgomery32_2e336m17/fenz.c
+++ b/src/Specific/montgomery32_2e336m17/fenz.c
@@ -1,32 +1,25 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x21 = (x20 | x19);
-{ uint32_t x22 = (x18 | x21);
-{ uint32_t x23 = (x16 | x22);
-{ uint32_t x24 = (x14 | x23);
-{ uint32_t x25 = (x12 | x24);
-{ uint32_t x26 = (x10 | x25);
-{ uint32_t x27 = (x8 | x26);
-{ uint32_t x28 = (x6 | x27);
-{ uint32_t x29 = (x4 | x28);
-{ uint32_t x30 = (x2 | x29);
-out[0] = x30;
-}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[11]) {
+ { const uint32_t x19 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x21 = (x20 | x19);
+ { uint32_t x22 = (x18 | x21);
+ { uint32_t x23 = (x16 | x22);
+ { uint32_t x24 = (x14 | x23);
+ { uint32_t x25 = (x12 | x24);
+ { uint32_t x26 = (x10 | x25);
+ { uint32_t x27 = (x8 | x26);
+ { uint32_t x28 = (x6 | x27);
+ { uint32_t x29 = (x4 | x28);
+ { uint32_t x30 = (x2 | x29);
+ out[0] = x30;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e336m3/feadd.c b/src/Specific/montgomery32_2e336m3/feadd.c
index 2b9e8f492..97458a3b9 100644
--- a/src/Specific/montgomery32_2e336m3/feadd.c
+++ b/src/Specific/montgomery32_2e336m3/feadd.c
@@ -1,66 +1,70 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x42, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25)
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x5, x25, &x45);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x7, x27, &x48);
-{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x9, x29, &x51);
-{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x11, x31, &x54);
-{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x55, x13, x33, &x57);
-{ uint32_t x60; uint8_t x61 = _addcarryx_u32(x58, x15, x35, &x60);
-{ uint32_t x63; uint8_t x64 = _addcarryx_u32(x61, x17, x37, &x63);
-{ uint32_t x66; uint8_t x67 = _addcarryx_u32(x64, x19, x39, &x66);
-{ uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x21, x41, &x69);
-{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x23, x43, &x72);
-{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x22, x42, &x75);
-{ uint32_t x78; uint8_t x79 = _subborrow_u32(0x0, x45, 0xfffffffd, &x78);
-{ uint32_t x81; uint8_t x82 = _subborrow_u32(x79, x48, 0xffffffff, &x81);
-{ uint32_t x84; uint8_t x85 = _subborrow_u32(x82, x51, 0xffffffff, &x84);
-{ uint32_t x87; uint8_t x88 = _subborrow_u32(x85, x54, 0xffffffff, &x87);
-{ uint32_t x90; uint8_t x91 = _subborrow_u32(x88, x57, 0xffffffff, &x90);
-{ uint32_t x93; uint8_t x94 = _subborrow_u32(x91, x60, 0xffffffff, &x93);
-{ uint32_t x96; uint8_t x97 = _subborrow_u32(x94, x63, 0xffffffff, &x96);
-{ uint32_t x99; uint8_t x100 = _subborrow_u32(x97, x66, 0xffffffff, &x99);
-{ uint32_t x102; uint8_t x103 = _subborrow_u32(x100, x69, 0xffffffff, &x102);
-{ uint32_t x105; uint8_t x106 = _subborrow_u32(x103, x72, 0xffffffff, &x105);
-{ uint32_t x108; uint8_t x109 = _subborrow_u32(x106, x75, 0xffff, &x108);
-{ uint32_t _; uint8_t x112 = _subborrow_u32(x109, x76, 0x0, &_);
-{ uint32_t x113 = cmovznz(x112, x108, x75);
-{ uint32_t x114 = cmovznz(x112, x105, x72);
-{ uint32_t x115 = cmovznz(x112, x102, x69);
-{ uint32_t x116 = cmovznz(x112, x99, x66);
-{ uint32_t x117 = cmovznz(x112, x96, x63);
-{ uint32_t x118 = cmovznz(x112, x93, x60);
-{ uint32_t x119 = cmovznz(x112, x90, x57);
-{ uint32_t x120 = cmovznz(x112, x87, x54);
-{ uint32_t x121 = cmovznz(x112, x84, x51);
-{ uint32_t x122 = cmovznz(x112, x81, x48);
-{ uint32_t x123 = cmovznz(x112, x78, x45);
-out[0] = x113;
-out[1] = x114;
-out[2] = x115;
-out[3] = x116;
-out[4] = x117;
-out[5] = x118;
-out[6] = x119;
-out[7] = x120;
-out[8] = x121;
-out[9] = x122;
-out[10] = x123;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[11];
+static void feadd(uint32_t out[11], const uint32_t in1[11], const uint32_t in2[11]) {
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x42 = in2[10];
+ { const uint32_t x43 = in2[9];
+ { const uint32_t x41 = in2[8];
+ { const uint32_t x39 = in2[7];
+ { const uint32_t x37 = in2[6];
+ { const uint32_t x35 = in2[5];
+ { const uint32_t x33 = in2[4];
+ { const uint32_t x31 = in2[3];
+ { const uint32_t x29 = in2[2];
+ { const uint32_t x27 = in2[1];
+ { const uint32_t x25 = in2[0];
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x5, x25, &x45);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x7, x27, &x48);
+ { uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x9, x29, &x51);
+ { uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x11, x31, &x54);
+ { uint32_t x57; uint8_t x58 = _addcarryx_u32(x55, x13, x33, &x57);
+ { uint32_t x60; uint8_t x61 = _addcarryx_u32(x58, x15, x35, &x60);
+ { uint32_t x63; uint8_t x64 = _addcarryx_u32(x61, x17, x37, &x63);
+ { uint32_t x66; uint8_t x67 = _addcarryx_u32(x64, x19, x39, &x66);
+ { uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x21, x41, &x69);
+ { uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x23, x43, &x72);
+ { uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x22, x42, &x75);
+ { uint32_t x78; uint8_t x79 = _subborrow_u32(0x0, x45, 0xfffffffd, &x78);
+ { uint32_t x81; uint8_t x82 = _subborrow_u32(x79, x48, 0xffffffff, &x81);
+ { uint32_t x84; uint8_t x85 = _subborrow_u32(x82, x51, 0xffffffff, &x84);
+ { uint32_t x87; uint8_t x88 = _subborrow_u32(x85, x54, 0xffffffff, &x87);
+ { uint32_t x90; uint8_t x91 = _subborrow_u32(x88, x57, 0xffffffff, &x90);
+ { uint32_t x93; uint8_t x94 = _subborrow_u32(x91, x60, 0xffffffff, &x93);
+ { uint32_t x96; uint8_t x97 = _subborrow_u32(x94, x63, 0xffffffff, &x96);
+ { uint32_t x99; uint8_t x100 = _subborrow_u32(x97, x66, 0xffffffff, &x99);
+ { uint32_t x102; uint8_t x103 = _subborrow_u32(x100, x69, 0xffffffff, &x102);
+ { uint32_t x105; uint8_t x106 = _subborrow_u32(x103, x72, 0xffffffff, &x105);
+ { uint32_t x108; uint8_t x109 = _subborrow_u32(x106, x75, 0xffff, &x108);
+ { uint32_t _; uint8_t x112 = _subborrow_u32(x109, x76, 0x0, &_);
+ { uint32_t x113 = cmovznz(x112, x108, x75);
+ { uint32_t x114 = cmovznz(x112, x105, x72);
+ { uint32_t x115 = cmovznz(x112, x102, x69);
+ { uint32_t x116 = cmovznz(x112, x99, x66);
+ { uint32_t x117 = cmovznz(x112, x96, x63);
+ { uint32_t x118 = cmovznz(x112, x93, x60);
+ { uint32_t x119 = cmovznz(x112, x90, x57);
+ { uint32_t x120 = cmovznz(x112, x87, x54);
+ { uint32_t x121 = cmovznz(x112, x84, x51);
+ { uint32_t x122 = cmovznz(x112, x81, x48);
+ { uint32_t x123 = cmovznz(x112, x78, x45);
+ out[0] = x123;
+ out[1] = x122;
+ out[2] = x121;
+ out[3] = x120;
+ out[4] = x119;
+ out[5] = x118;
+ out[6] = x117;
+ out[7] = x116;
+ out[8] = x115;
+ out[9] = x114;
+ out[10] = x113;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e336m3/fenz.c b/src/Specific/montgomery32_2e336m3/fenz.c
index 9e77ab0f6..d962eda51 100644
--- a/src/Specific/montgomery32_2e336m3/fenz.c
+++ b/src/Specific/montgomery32_2e336m3/fenz.c
@@ -1,32 +1,25 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x21 = (x20 | x19);
-{ uint32_t x22 = (x18 | x21);
-{ uint32_t x23 = (x16 | x22);
-{ uint32_t x24 = (x14 | x23);
-{ uint32_t x25 = (x12 | x24);
-{ uint32_t x26 = (x10 | x25);
-{ uint32_t x27 = (x8 | x26);
-{ uint32_t x28 = (x6 | x27);
-{ uint32_t x29 = (x4 | x28);
-{ uint32_t x30 = (x2 | x29);
-out[0] = x30;
-}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[11]) {
+ { const uint32_t x19 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x21 = (x20 | x19);
+ { uint32_t x22 = (x18 | x21);
+ { uint32_t x23 = (x16 | x22);
+ { uint32_t x24 = (x14 | x23);
+ { uint32_t x25 = (x12 | x24);
+ { uint32_t x26 = (x10 | x25);
+ { uint32_t x27 = (x8 | x26);
+ { uint32_t x28 = (x6 | x27);
+ { uint32_t x29 = (x4 | x28);
+ { uint32_t x30 = (x2 | x29);
+ out[0] = x30;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e338m15/feadd.c b/src/Specific/montgomery32_2e338m15/feadd.c
index 6f3769ede..e50596dff 100644
--- a/src/Specific/montgomery32_2e338m15/feadd.c
+++ b/src/Specific/montgomery32_2e338m15/feadd.c
@@ -1,66 +1,70 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x42, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25)
-{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x5, x25, &x45);
-{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x7, x27, &x48);
-{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x9, x29, &x51);
-{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x11, x31, &x54);
-{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x55, x13, x33, &x57);
-{ uint32_t x60; uint8_t x61 = _addcarryx_u32(x58, x15, x35, &x60);
-{ uint32_t x63; uint8_t x64 = _addcarryx_u32(x61, x17, x37, &x63);
-{ uint32_t x66; uint8_t x67 = _addcarryx_u32(x64, x19, x39, &x66);
-{ uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x21, x41, &x69);
-{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x23, x43, &x72);
-{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x22, x42, &x75);
-{ uint32_t x78; uint8_t x79 = _subborrow_u32(0x0, x45, 0xfffffff1, &x78);
-{ uint32_t x81; uint8_t x82 = _subborrow_u32(x79, x48, 0xffffffff, &x81);
-{ uint32_t x84; uint8_t x85 = _subborrow_u32(x82, x51, 0xffffffff, &x84);
-{ uint32_t x87; uint8_t x88 = _subborrow_u32(x85, x54, 0xffffffff, &x87);
-{ uint32_t x90; uint8_t x91 = _subborrow_u32(x88, x57, 0xffffffff, &x90);
-{ uint32_t x93; uint8_t x94 = _subborrow_u32(x91, x60, 0xffffffff, &x93);
-{ uint32_t x96; uint8_t x97 = _subborrow_u32(x94, x63, 0xffffffff, &x96);
-{ uint32_t x99; uint8_t x100 = _subborrow_u32(x97, x66, 0xffffffff, &x99);
-{ uint32_t x102; uint8_t x103 = _subborrow_u32(x100, x69, 0xffffffff, &x102);
-{ uint32_t x105; uint8_t x106 = _subborrow_u32(x103, x72, 0xffffffff, &x105);
-{ uint32_t x108; uint8_t x109 = _subborrow_u32(x106, x75, 0x3ffff, &x108);
-{ uint32_t _; uint8_t x112 = _subborrow_u32(x109, x76, 0x0, &_);
-{ uint32_t x113 = cmovznz(x112, x108, x75);
-{ uint32_t x114 = cmovznz(x112, x105, x72);
-{ uint32_t x115 = cmovznz(x112, x102, x69);
-{ uint32_t x116 = cmovznz(x112, x99, x66);
-{ uint32_t x117 = cmovznz(x112, x96, x63);
-{ uint32_t x118 = cmovznz(x112, x93, x60);
-{ uint32_t x119 = cmovznz(x112, x90, x57);
-{ uint32_t x120 = cmovznz(x112, x87, x54);
-{ uint32_t x121 = cmovznz(x112, x84, x51);
-{ uint32_t x122 = cmovznz(x112, x81, x48);
-{ uint32_t x123 = cmovznz(x112, x78, x45);
-out[0] = x113;
-out[1] = x114;
-out[2] = x115;
-out[3] = x116;
-out[4] = x117;
-out[5] = x118;
-out[6] = x119;
-out[7] = x120;
-out[8] = x121;
-out[9] = x122;
-out[10] = x123;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[11];
+static void feadd(uint32_t out[11], const uint32_t in1[11], const uint32_t in2[11]) {
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x42 = in2[10];
+ { const uint32_t x43 = in2[9];
+ { const uint32_t x41 = in2[8];
+ { const uint32_t x39 = in2[7];
+ { const uint32_t x37 = in2[6];
+ { const uint32_t x35 = in2[5];
+ { const uint32_t x33 = in2[4];
+ { const uint32_t x31 = in2[3];
+ { const uint32_t x29 = in2[2];
+ { const uint32_t x27 = in2[1];
+ { const uint32_t x25 = in2[0];
+ { uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x5, x25, &x45);
+ { uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x7, x27, &x48);
+ { uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x9, x29, &x51);
+ { uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x11, x31, &x54);
+ { uint32_t x57; uint8_t x58 = _addcarryx_u32(x55, x13, x33, &x57);
+ { uint32_t x60; uint8_t x61 = _addcarryx_u32(x58, x15, x35, &x60);
+ { uint32_t x63; uint8_t x64 = _addcarryx_u32(x61, x17, x37, &x63);
+ { uint32_t x66; uint8_t x67 = _addcarryx_u32(x64, x19, x39, &x66);
+ { uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x21, x41, &x69);
+ { uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x23, x43, &x72);
+ { uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x22, x42, &x75);
+ { uint32_t x78; uint8_t x79 = _subborrow_u32(0x0, x45, 0xfffffff1, &x78);
+ { uint32_t x81; uint8_t x82 = _subborrow_u32(x79, x48, 0xffffffff, &x81);
+ { uint32_t x84; uint8_t x85 = _subborrow_u32(x82, x51, 0xffffffff, &x84);
+ { uint32_t x87; uint8_t x88 = _subborrow_u32(x85, x54, 0xffffffff, &x87);
+ { uint32_t x90; uint8_t x91 = _subborrow_u32(x88, x57, 0xffffffff, &x90);
+ { uint32_t x93; uint8_t x94 = _subborrow_u32(x91, x60, 0xffffffff, &x93);
+ { uint32_t x96; uint8_t x97 = _subborrow_u32(x94, x63, 0xffffffff, &x96);
+ { uint32_t x99; uint8_t x100 = _subborrow_u32(x97, x66, 0xffffffff, &x99);
+ { uint32_t x102; uint8_t x103 = _subborrow_u32(x100, x69, 0xffffffff, &x102);
+ { uint32_t x105; uint8_t x106 = _subborrow_u32(x103, x72, 0xffffffff, &x105);
+ { uint32_t x108; uint8_t x109 = _subborrow_u32(x106, x75, 0x3ffff, &x108);
+ { uint32_t _; uint8_t x112 = _subborrow_u32(x109, x76, 0x0, &_);
+ { uint32_t x113 = cmovznz(x112, x108, x75);
+ { uint32_t x114 = cmovznz(x112, x105, x72);
+ { uint32_t x115 = cmovznz(x112, x102, x69);
+ { uint32_t x116 = cmovznz(x112, x99, x66);
+ { uint32_t x117 = cmovznz(x112, x96, x63);
+ { uint32_t x118 = cmovznz(x112, x93, x60);
+ { uint32_t x119 = cmovznz(x112, x90, x57);
+ { uint32_t x120 = cmovznz(x112, x87, x54);
+ { uint32_t x121 = cmovznz(x112, x84, x51);
+ { uint32_t x122 = cmovznz(x112, x81, x48);
+ { uint32_t x123 = cmovznz(x112, x78, x45);
+ out[0] = x123;
+ out[1] = x122;
+ out[2] = x121;
+ out[3] = x120;
+ out[4] = x119;
+ out[5] = x118;
+ out[6] = x117;
+ out[7] = x116;
+ out[8] = x115;
+ out[9] = x114;
+ out[10] = x113;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e338m15/fenz.c b/src/Specific/montgomery32_2e338m15/fenz.c
index 9e77ab0f6..d962eda51 100644
--- a/src/Specific/montgomery32_2e338m15/fenz.c
+++ b/src/Specific/montgomery32_2e338m15/fenz.c
@@ -1,32 +1,25 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x21 = (x20 | x19);
-{ uint32_t x22 = (x18 | x21);
-{ uint32_t x23 = (x16 | x22);
-{ uint32_t x24 = (x14 | x23);
-{ uint32_t x25 = (x12 | x24);
-{ uint32_t x26 = (x10 | x25);
-{ uint32_t x27 = (x8 | x26);
-{ uint32_t x28 = (x6 | x27);
-{ uint32_t x29 = (x4 | x28);
-{ uint32_t x30 = (x2 | x29);
-out[0] = x30;
-}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[11]) {
+ { const uint32_t x19 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x21 = (x20 | x19);
+ { uint32_t x22 = (x18 | x21);
+ { uint32_t x23 = (x16 | x22);
+ { uint32_t x24 = (x14 | x23);
+ { uint32_t x25 = (x12 | x24);
+ { uint32_t x26 = (x10 | x25);
+ { uint32_t x27 = (x8 | x26);
+ { uint32_t x28 = (x6 | x27);
+ { uint32_t x29 = (x4 | x28);
+ { uint32_t x30 = (x2 | x29);
+ out[0] = x30;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e369m25/feadd.c b/src/Specific/montgomery32_2e369m25/feadd.c
index 340cf25c9..00c28d2ea 100644
--- a/src/Specific/montgomery32_2e369m25/feadd.c
+++ b/src/Specific/montgomery32_2e369m25/feadd.c
@@ -1,70 +1,76 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
-{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
-{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
-{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
-{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
-{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
-{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
-{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
-{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
-{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
-{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
-{ uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffffe7, &x85);
-{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
-{ uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
-{ uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
-{ uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
-{ uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
-{ uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
-{ uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
-{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
-{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
-{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
-{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0x1ffff, &x118);
-{ uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
-{ uint32_t x123 = cmovznz(x122, x118, x82);
-{ uint32_t x124 = cmovznz(x122, x115, x79);
-{ uint32_t x125 = cmovznz(x122, x112, x76);
-{ uint32_t x126 = cmovznz(x122, x109, x73);
-{ uint32_t x127 = cmovznz(x122, x106, x70);
-{ uint32_t x128 = cmovznz(x122, x103, x67);
-{ uint32_t x129 = cmovznz(x122, x100, x64);
-{ uint32_t x130 = cmovznz(x122, x97, x61);
-{ uint32_t x131 = cmovznz(x122, x94, x58);
-{ uint32_t x132 = cmovznz(x122, x91, x55);
-{ uint32_t x133 = cmovznz(x122, x88, x52);
-{ uint32_t x134 = cmovznz(x122, x85, x49);
-out[0] = x123;
-out[1] = x124;
-out[2] = x125;
-out[3] = x126;
-out[4] = x127;
-out[5] = x128;
-out[6] = x129;
-out[7] = x130;
-out[8] = x131;
-out[9] = x132;
-out[10] = x133;
-out[11] = x134;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void feadd(uint32_t out[12], const uint32_t in1[12], const uint32_t in2[12]) {
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x46 = in2[11];
+ { const uint32_t x47 = in2[10];
+ { const uint32_t x45 = in2[9];
+ { const uint32_t x43 = in2[8];
+ { const uint32_t x41 = in2[7];
+ { const uint32_t x39 = in2[6];
+ { const uint32_t x37 = in2[5];
+ { const uint32_t x35 = in2[4];
+ { const uint32_t x33 = in2[3];
+ { const uint32_t x31 = in2[2];
+ { const uint32_t x29 = in2[1];
+ { const uint32_t x27 = in2[0];
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
+ { uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
+ { uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
+ { uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
+ { uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
+ { uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
+ { uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
+ { uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
+ { uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
+ { uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
+ { uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffffe7, &x85);
+ { uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
+ { uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
+ { uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
+ { uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
+ { uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
+ { uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
+ { uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
+ { uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
+ { uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
+ { uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
+ { uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0x1ffff, &x118);
+ { uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
+ { uint32_t x123 = cmovznz(x122, x118, x82);
+ { uint32_t x124 = cmovznz(x122, x115, x79);
+ { uint32_t x125 = cmovznz(x122, x112, x76);
+ { uint32_t x126 = cmovznz(x122, x109, x73);
+ { uint32_t x127 = cmovznz(x122, x106, x70);
+ { uint32_t x128 = cmovznz(x122, x103, x67);
+ { uint32_t x129 = cmovznz(x122, x100, x64);
+ { uint32_t x130 = cmovznz(x122, x97, x61);
+ { uint32_t x131 = cmovznz(x122, x94, x58);
+ { uint32_t x132 = cmovznz(x122, x91, x55);
+ { uint32_t x133 = cmovznz(x122, x88, x52);
+ { uint32_t x134 = cmovznz(x122, x85, x49);
+ out[0] = x134;
+ out[1] = x133;
+ out[2] = x132;
+ out[3] = x131;
+ out[4] = x130;
+ out[5] = x129;
+ out[6] = x128;
+ out[7] = x127;
+ out[8] = x126;
+ out[9] = x125;
+ out[10] = x124;
+ out[11] = x123;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e369m25/fenz.c b/src/Specific/montgomery32_2e369m25/fenz.c
index 047ff14c3..61ca36c29 100644
--- a/src/Specific/montgomery32_2e369m25/fenz.c
+++ b/src/Specific/montgomery32_2e369m25/fenz.c
@@ -1,33 +1,27 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x23 = (x22 | x21);
-{ uint32_t x24 = (x20 | x23);
-{ uint32_t x25 = (x18 | x24);
-{ uint32_t x26 = (x16 | x25);
-{ uint32_t x27 = (x14 | x26);
-{ uint32_t x28 = (x12 | x27);
-{ uint32_t x29 = (x10 | x28);
-{ uint32_t x30 = (x8 | x29);
-{ uint32_t x31 = (x6 | x30);
-{ uint32_t x32 = (x4 | x31);
-{ uint32_t x33 = (x2 | x32);
-out[0] = x33;
-}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x23 = (x22 | x21);
+ { uint32_t x24 = (x20 | x23);
+ { uint32_t x25 = (x18 | x24);
+ { uint32_t x26 = (x16 | x25);
+ { uint32_t x27 = (x14 | x26);
+ { uint32_t x28 = (x12 | x27);
+ { uint32_t x29 = (x10 | x28);
+ { uint32_t x30 = (x8 | x29);
+ { uint32_t x31 = (x6 | x30);
+ { uint32_t x32 = (x4 | x31);
+ { uint32_t x33 = (x2 | x32);
+ out[0] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e379m19/feadd.c b/src/Specific/montgomery32_2e379m19/feadd.c
index b26447d8e..985e02577 100644
--- a/src/Specific/montgomery32_2e379m19/feadd.c
+++ b/src/Specific/montgomery32_2e379m19/feadd.c
@@ -1,70 +1,76 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
-{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
-{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
-{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
-{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
-{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
-{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
-{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
-{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
-{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
-{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
-{ uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffffed, &x85);
-{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
-{ uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
-{ uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
-{ uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
-{ uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
-{ uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
-{ uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
-{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
-{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
-{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
-{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0x7ffffff, &x118);
-{ uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
-{ uint32_t x123 = cmovznz(x122, x118, x82);
-{ uint32_t x124 = cmovznz(x122, x115, x79);
-{ uint32_t x125 = cmovznz(x122, x112, x76);
-{ uint32_t x126 = cmovznz(x122, x109, x73);
-{ uint32_t x127 = cmovznz(x122, x106, x70);
-{ uint32_t x128 = cmovznz(x122, x103, x67);
-{ uint32_t x129 = cmovznz(x122, x100, x64);
-{ uint32_t x130 = cmovznz(x122, x97, x61);
-{ uint32_t x131 = cmovznz(x122, x94, x58);
-{ uint32_t x132 = cmovznz(x122, x91, x55);
-{ uint32_t x133 = cmovznz(x122, x88, x52);
-{ uint32_t x134 = cmovznz(x122, x85, x49);
-out[0] = x123;
-out[1] = x124;
-out[2] = x125;
-out[3] = x126;
-out[4] = x127;
-out[5] = x128;
-out[6] = x129;
-out[7] = x130;
-out[8] = x131;
-out[9] = x132;
-out[10] = x133;
-out[11] = x134;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void feadd(uint32_t out[12], const uint32_t in1[12], const uint32_t in2[12]) {
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x46 = in2[11];
+ { const uint32_t x47 = in2[10];
+ { const uint32_t x45 = in2[9];
+ { const uint32_t x43 = in2[8];
+ { const uint32_t x41 = in2[7];
+ { const uint32_t x39 = in2[6];
+ { const uint32_t x37 = in2[5];
+ { const uint32_t x35 = in2[4];
+ { const uint32_t x33 = in2[3];
+ { const uint32_t x31 = in2[2];
+ { const uint32_t x29 = in2[1];
+ { const uint32_t x27 = in2[0];
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
+ { uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
+ { uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
+ { uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
+ { uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
+ { uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
+ { uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
+ { uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
+ { uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
+ { uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
+ { uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffffed, &x85);
+ { uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
+ { uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
+ { uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
+ { uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
+ { uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
+ { uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
+ { uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
+ { uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
+ { uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
+ { uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
+ { uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0x7ffffff, &x118);
+ { uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
+ { uint32_t x123 = cmovznz(x122, x118, x82);
+ { uint32_t x124 = cmovznz(x122, x115, x79);
+ { uint32_t x125 = cmovznz(x122, x112, x76);
+ { uint32_t x126 = cmovznz(x122, x109, x73);
+ { uint32_t x127 = cmovznz(x122, x106, x70);
+ { uint32_t x128 = cmovznz(x122, x103, x67);
+ { uint32_t x129 = cmovznz(x122, x100, x64);
+ { uint32_t x130 = cmovznz(x122, x97, x61);
+ { uint32_t x131 = cmovznz(x122, x94, x58);
+ { uint32_t x132 = cmovznz(x122, x91, x55);
+ { uint32_t x133 = cmovznz(x122, x88, x52);
+ { uint32_t x134 = cmovznz(x122, x85, x49);
+ out[0] = x134;
+ out[1] = x133;
+ out[2] = x132;
+ out[3] = x131;
+ out[4] = x130;
+ out[5] = x129;
+ out[6] = x128;
+ out[7] = x127;
+ out[8] = x126;
+ out[9] = x125;
+ out[10] = x124;
+ out[11] = x123;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e379m19/fenz.c b/src/Specific/montgomery32_2e379m19/fenz.c
index 047ff14c3..61ca36c29 100644
--- a/src/Specific/montgomery32_2e379m19/fenz.c
+++ b/src/Specific/montgomery32_2e379m19/fenz.c
@@ -1,33 +1,27 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x23 = (x22 | x21);
-{ uint32_t x24 = (x20 | x23);
-{ uint32_t x25 = (x18 | x24);
-{ uint32_t x26 = (x16 | x25);
-{ uint32_t x27 = (x14 | x26);
-{ uint32_t x28 = (x12 | x27);
-{ uint32_t x29 = (x10 | x28);
-{ uint32_t x30 = (x8 | x29);
-{ uint32_t x31 = (x6 | x30);
-{ uint32_t x32 = (x4 | x31);
-{ uint32_t x33 = (x2 | x32);
-out[0] = x33;
-}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x23 = (x22 | x21);
+ { uint32_t x24 = (x20 | x23);
+ { uint32_t x25 = (x18 | x24);
+ { uint32_t x26 = (x16 | x25);
+ { uint32_t x27 = (x14 | x26);
+ { uint32_t x28 = (x12 | x27);
+ { uint32_t x29 = (x10 | x28);
+ { uint32_t x30 = (x8 | x29);
+ { uint32_t x31 = (x6 | x30);
+ { uint32_t x32 = (x4 | x31);
+ { uint32_t x33 = (x2 | x32);
+ out[0] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e382m105/feadd.c b/src/Specific/montgomery32_2e382m105/feadd.c
index 3a2ec01f0..fef83b2bc 100644
--- a/src/Specific/montgomery32_2e382m105/feadd.c
+++ b/src/Specific/montgomery32_2e382m105/feadd.c
@@ -1,70 +1,76 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
-{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
-{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
-{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
-{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
-{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
-{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
-{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
-{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
-{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
-{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
-{ uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffff97, &x85);
-{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
-{ uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
-{ uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
-{ uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
-{ uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
-{ uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
-{ uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
-{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
-{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
-{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
-{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0x3fffffff, &x118);
-{ uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
-{ uint32_t x123 = cmovznz(x122, x118, x82);
-{ uint32_t x124 = cmovznz(x122, x115, x79);
-{ uint32_t x125 = cmovznz(x122, x112, x76);
-{ uint32_t x126 = cmovznz(x122, x109, x73);
-{ uint32_t x127 = cmovznz(x122, x106, x70);
-{ uint32_t x128 = cmovznz(x122, x103, x67);
-{ uint32_t x129 = cmovznz(x122, x100, x64);
-{ uint32_t x130 = cmovznz(x122, x97, x61);
-{ uint32_t x131 = cmovznz(x122, x94, x58);
-{ uint32_t x132 = cmovznz(x122, x91, x55);
-{ uint32_t x133 = cmovznz(x122, x88, x52);
-{ uint32_t x134 = cmovznz(x122, x85, x49);
-out[0] = x123;
-out[1] = x124;
-out[2] = x125;
-out[3] = x126;
-out[4] = x127;
-out[5] = x128;
-out[6] = x129;
-out[7] = x130;
-out[8] = x131;
-out[9] = x132;
-out[10] = x133;
-out[11] = x134;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void feadd(uint32_t out[12], const uint32_t in1[12], const uint32_t in2[12]) {
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x46 = in2[11];
+ { const uint32_t x47 = in2[10];
+ { const uint32_t x45 = in2[9];
+ { const uint32_t x43 = in2[8];
+ { const uint32_t x41 = in2[7];
+ { const uint32_t x39 = in2[6];
+ { const uint32_t x37 = in2[5];
+ { const uint32_t x35 = in2[4];
+ { const uint32_t x33 = in2[3];
+ { const uint32_t x31 = in2[2];
+ { const uint32_t x29 = in2[1];
+ { const uint32_t x27 = in2[0];
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
+ { uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
+ { uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
+ { uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
+ { uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
+ { uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
+ { uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
+ { uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
+ { uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
+ { uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
+ { uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffff97, &x85);
+ { uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
+ { uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
+ { uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
+ { uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
+ { uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
+ { uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
+ { uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
+ { uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
+ { uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
+ { uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
+ { uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0x3fffffff, &x118);
+ { uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
+ { uint32_t x123 = cmovznz(x122, x118, x82);
+ { uint32_t x124 = cmovznz(x122, x115, x79);
+ { uint32_t x125 = cmovznz(x122, x112, x76);
+ { uint32_t x126 = cmovznz(x122, x109, x73);
+ { uint32_t x127 = cmovznz(x122, x106, x70);
+ { uint32_t x128 = cmovznz(x122, x103, x67);
+ { uint32_t x129 = cmovznz(x122, x100, x64);
+ { uint32_t x130 = cmovznz(x122, x97, x61);
+ { uint32_t x131 = cmovznz(x122, x94, x58);
+ { uint32_t x132 = cmovznz(x122, x91, x55);
+ { uint32_t x133 = cmovznz(x122, x88, x52);
+ { uint32_t x134 = cmovznz(x122, x85, x49);
+ out[0] = x134;
+ out[1] = x133;
+ out[2] = x132;
+ out[3] = x131;
+ out[4] = x130;
+ out[5] = x129;
+ out[6] = x128;
+ out[7] = x127;
+ out[8] = x126;
+ out[9] = x125;
+ out[10] = x124;
+ out[11] = x123;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e382m105/fenz.c b/src/Specific/montgomery32_2e382m105/fenz.c
index 047ff14c3..61ca36c29 100644
--- a/src/Specific/montgomery32_2e382m105/fenz.c
+++ b/src/Specific/montgomery32_2e382m105/fenz.c
@@ -1,33 +1,27 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x23 = (x22 | x21);
-{ uint32_t x24 = (x20 | x23);
-{ uint32_t x25 = (x18 | x24);
-{ uint32_t x26 = (x16 | x25);
-{ uint32_t x27 = (x14 | x26);
-{ uint32_t x28 = (x12 | x27);
-{ uint32_t x29 = (x10 | x28);
-{ uint32_t x30 = (x8 | x29);
-{ uint32_t x31 = (x6 | x30);
-{ uint32_t x32 = (x4 | x31);
-{ uint32_t x33 = (x2 | x32);
-out[0] = x33;
-}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x23 = (x22 | x21);
+ { uint32_t x24 = (x20 | x23);
+ { uint32_t x25 = (x18 | x24);
+ { uint32_t x26 = (x16 | x25);
+ { uint32_t x27 = (x14 | x26);
+ { uint32_t x28 = (x12 | x27);
+ { uint32_t x29 = (x10 | x28);
+ { uint32_t x30 = (x8 | x29);
+ { uint32_t x31 = (x6 | x30);
+ { uint32_t x32 = (x4 | x31);
+ { uint32_t x33 = (x2 | x32);
+ out[0] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e383m187/feadd.c b/src/Specific/montgomery32_2e383m187/feadd.c
index da64dfe0c..6af8e69b1 100644
--- a/src/Specific/montgomery32_2e383m187/feadd.c
+++ b/src/Specific/montgomery32_2e383m187/feadd.c
@@ -1,70 +1,76 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
-{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
-{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
-{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
-{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
-{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
-{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
-{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
-{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
-{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
-{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
-{ uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffff45, &x85);
-{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
-{ uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
-{ uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
-{ uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
-{ uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
-{ uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
-{ uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
-{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
-{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
-{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
-{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0x7fffffff, &x118);
-{ uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
-{ uint32_t x123 = cmovznz(x122, x118, x82);
-{ uint32_t x124 = cmovznz(x122, x115, x79);
-{ uint32_t x125 = cmovznz(x122, x112, x76);
-{ uint32_t x126 = cmovznz(x122, x109, x73);
-{ uint32_t x127 = cmovznz(x122, x106, x70);
-{ uint32_t x128 = cmovznz(x122, x103, x67);
-{ uint32_t x129 = cmovznz(x122, x100, x64);
-{ uint32_t x130 = cmovznz(x122, x97, x61);
-{ uint32_t x131 = cmovznz(x122, x94, x58);
-{ uint32_t x132 = cmovznz(x122, x91, x55);
-{ uint32_t x133 = cmovznz(x122, x88, x52);
-{ uint32_t x134 = cmovznz(x122, x85, x49);
-out[0] = x123;
-out[1] = x124;
-out[2] = x125;
-out[3] = x126;
-out[4] = x127;
-out[5] = x128;
-out[6] = x129;
-out[7] = x130;
-out[8] = x131;
-out[9] = x132;
-out[10] = x133;
-out[11] = x134;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void feadd(uint32_t out[12], const uint32_t in1[12], const uint32_t in2[12]) {
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x46 = in2[11];
+ { const uint32_t x47 = in2[10];
+ { const uint32_t x45 = in2[9];
+ { const uint32_t x43 = in2[8];
+ { const uint32_t x41 = in2[7];
+ { const uint32_t x39 = in2[6];
+ { const uint32_t x37 = in2[5];
+ { const uint32_t x35 = in2[4];
+ { const uint32_t x33 = in2[3];
+ { const uint32_t x31 = in2[2];
+ { const uint32_t x29 = in2[1];
+ { const uint32_t x27 = in2[0];
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
+ { uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
+ { uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
+ { uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
+ { uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
+ { uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
+ { uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
+ { uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
+ { uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
+ { uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
+ { uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffff45, &x85);
+ { uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
+ { uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
+ { uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
+ { uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
+ { uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
+ { uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
+ { uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
+ { uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
+ { uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
+ { uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
+ { uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0x7fffffff, &x118);
+ { uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
+ { uint32_t x123 = cmovznz(x122, x118, x82);
+ { uint32_t x124 = cmovznz(x122, x115, x79);
+ { uint32_t x125 = cmovznz(x122, x112, x76);
+ { uint32_t x126 = cmovznz(x122, x109, x73);
+ { uint32_t x127 = cmovznz(x122, x106, x70);
+ { uint32_t x128 = cmovznz(x122, x103, x67);
+ { uint32_t x129 = cmovznz(x122, x100, x64);
+ { uint32_t x130 = cmovznz(x122, x97, x61);
+ { uint32_t x131 = cmovznz(x122, x94, x58);
+ { uint32_t x132 = cmovznz(x122, x91, x55);
+ { uint32_t x133 = cmovznz(x122, x88, x52);
+ { uint32_t x134 = cmovznz(x122, x85, x49);
+ out[0] = x134;
+ out[1] = x133;
+ out[2] = x132;
+ out[3] = x131;
+ out[4] = x130;
+ out[5] = x129;
+ out[6] = x128;
+ out[7] = x127;
+ out[8] = x126;
+ out[9] = x125;
+ out[10] = x124;
+ out[11] = x123;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e383m187/fenz.c b/src/Specific/montgomery32_2e383m187/fenz.c
index 047ff14c3..61ca36c29 100644
--- a/src/Specific/montgomery32_2e383m187/fenz.c
+++ b/src/Specific/montgomery32_2e383m187/fenz.c
@@ -1,33 +1,27 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x23 = (x22 | x21);
-{ uint32_t x24 = (x20 | x23);
-{ uint32_t x25 = (x18 | x24);
-{ uint32_t x26 = (x16 | x25);
-{ uint32_t x27 = (x14 | x26);
-{ uint32_t x28 = (x12 | x27);
-{ uint32_t x29 = (x10 | x28);
-{ uint32_t x30 = (x8 | x29);
-{ uint32_t x31 = (x6 | x30);
-{ uint32_t x32 = (x4 | x31);
-{ uint32_t x33 = (x2 | x32);
-out[0] = x33;
-}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x23 = (x22 | x21);
+ { uint32_t x24 = (x20 | x23);
+ { uint32_t x25 = (x18 | x24);
+ { uint32_t x26 = (x16 | x25);
+ { uint32_t x27 = (x14 | x26);
+ { uint32_t x28 = (x12 | x27);
+ { uint32_t x29 = (x10 | x28);
+ { uint32_t x30 = (x8 | x29);
+ { uint32_t x31 = (x6 | x30);
+ { uint32_t x32 = (x4 | x31);
+ { uint32_t x33 = (x2 | x32);
+ out[0] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e383m31/feadd.c b/src/Specific/montgomery32_2e383m31/feadd.c
index 39320467e..464a9aa36 100644
--- a/src/Specific/montgomery32_2e383m31/feadd.c
+++ b/src/Specific/montgomery32_2e383m31/feadd.c
@@ -1,70 +1,76 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
-{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
-{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
-{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
-{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
-{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
-{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
-{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
-{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
-{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
-{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
-{ uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffffe1, &x85);
-{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
-{ uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
-{ uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
-{ uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
-{ uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
-{ uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
-{ uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
-{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
-{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
-{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
-{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0x7fffffff, &x118);
-{ uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
-{ uint32_t x123 = cmovznz(x122, x118, x82);
-{ uint32_t x124 = cmovznz(x122, x115, x79);
-{ uint32_t x125 = cmovznz(x122, x112, x76);
-{ uint32_t x126 = cmovznz(x122, x109, x73);
-{ uint32_t x127 = cmovznz(x122, x106, x70);
-{ uint32_t x128 = cmovznz(x122, x103, x67);
-{ uint32_t x129 = cmovznz(x122, x100, x64);
-{ uint32_t x130 = cmovznz(x122, x97, x61);
-{ uint32_t x131 = cmovznz(x122, x94, x58);
-{ uint32_t x132 = cmovznz(x122, x91, x55);
-{ uint32_t x133 = cmovznz(x122, x88, x52);
-{ uint32_t x134 = cmovznz(x122, x85, x49);
-out[0] = x123;
-out[1] = x124;
-out[2] = x125;
-out[3] = x126;
-out[4] = x127;
-out[5] = x128;
-out[6] = x129;
-out[7] = x130;
-out[8] = x131;
-out[9] = x132;
-out[10] = x133;
-out[11] = x134;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void feadd(uint32_t out[12], const uint32_t in1[12], const uint32_t in2[12]) {
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x46 = in2[11];
+ { const uint32_t x47 = in2[10];
+ { const uint32_t x45 = in2[9];
+ { const uint32_t x43 = in2[8];
+ { const uint32_t x41 = in2[7];
+ { const uint32_t x39 = in2[6];
+ { const uint32_t x37 = in2[5];
+ { const uint32_t x35 = in2[4];
+ { const uint32_t x33 = in2[3];
+ { const uint32_t x31 = in2[2];
+ { const uint32_t x29 = in2[1];
+ { const uint32_t x27 = in2[0];
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
+ { uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
+ { uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
+ { uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
+ { uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
+ { uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
+ { uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
+ { uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
+ { uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
+ { uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
+ { uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffffe1, &x85);
+ { uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
+ { uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
+ { uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
+ { uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
+ { uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
+ { uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
+ { uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
+ { uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
+ { uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
+ { uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
+ { uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0x7fffffff, &x118);
+ { uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
+ { uint32_t x123 = cmovznz(x122, x118, x82);
+ { uint32_t x124 = cmovznz(x122, x115, x79);
+ { uint32_t x125 = cmovznz(x122, x112, x76);
+ { uint32_t x126 = cmovznz(x122, x109, x73);
+ { uint32_t x127 = cmovznz(x122, x106, x70);
+ { uint32_t x128 = cmovznz(x122, x103, x67);
+ { uint32_t x129 = cmovznz(x122, x100, x64);
+ { uint32_t x130 = cmovznz(x122, x97, x61);
+ { uint32_t x131 = cmovznz(x122, x94, x58);
+ { uint32_t x132 = cmovznz(x122, x91, x55);
+ { uint32_t x133 = cmovznz(x122, x88, x52);
+ { uint32_t x134 = cmovznz(x122, x85, x49);
+ out[0] = x134;
+ out[1] = x133;
+ out[2] = x132;
+ out[3] = x131;
+ out[4] = x130;
+ out[5] = x129;
+ out[6] = x128;
+ out[7] = x127;
+ out[8] = x126;
+ out[9] = x125;
+ out[10] = x124;
+ out[11] = x123;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e383m31/fenz.c b/src/Specific/montgomery32_2e383m31/fenz.c
index 047ff14c3..61ca36c29 100644
--- a/src/Specific/montgomery32_2e383m31/fenz.c
+++ b/src/Specific/montgomery32_2e383m31/fenz.c
@@ -1,33 +1,27 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x23 = (x22 | x21);
-{ uint32_t x24 = (x20 | x23);
-{ uint32_t x25 = (x18 | x24);
-{ uint32_t x26 = (x16 | x25);
-{ uint32_t x27 = (x14 | x26);
-{ uint32_t x28 = (x12 | x27);
-{ uint32_t x29 = (x10 | x28);
-{ uint32_t x30 = (x8 | x29);
-{ uint32_t x31 = (x6 | x30);
-{ uint32_t x32 = (x4 | x31);
-{ uint32_t x33 = (x2 | x32);
-out[0] = x33;
-}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x23 = (x22 | x21);
+ { uint32_t x24 = (x20 | x23);
+ { uint32_t x25 = (x18 | x24);
+ { uint32_t x26 = (x16 | x25);
+ { uint32_t x27 = (x14 | x26);
+ { uint32_t x28 = (x12 | x27);
+ { uint32_t x29 = (x10 | x28);
+ { uint32_t x30 = (x8 | x29);
+ { uint32_t x31 = (x6 | x30);
+ { uint32_t x32 = (x4 | x31);
+ { uint32_t x33 = (x2 | x32);
+ out[0] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e383m421/feadd.c b/src/Specific/montgomery32_2e383m421/feadd.c
index c299cb16e..e20d02b97 100644
--- a/src/Specific/montgomery32_2e383m421/feadd.c
+++ b/src/Specific/montgomery32_2e383m421/feadd.c
@@ -1,70 +1,76 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
-{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
-{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
-{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
-{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
-{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
-{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
-{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
-{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
-{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
-{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
-{ uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xfffffe5b, &x85);
-{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
-{ uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
-{ uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
-{ uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
-{ uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
-{ uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
-{ uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
-{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
-{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
-{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
-{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0x7fffffff, &x118);
-{ uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
-{ uint32_t x123 = cmovznz(x122, x118, x82);
-{ uint32_t x124 = cmovznz(x122, x115, x79);
-{ uint32_t x125 = cmovznz(x122, x112, x76);
-{ uint32_t x126 = cmovznz(x122, x109, x73);
-{ uint32_t x127 = cmovznz(x122, x106, x70);
-{ uint32_t x128 = cmovznz(x122, x103, x67);
-{ uint32_t x129 = cmovznz(x122, x100, x64);
-{ uint32_t x130 = cmovznz(x122, x97, x61);
-{ uint32_t x131 = cmovznz(x122, x94, x58);
-{ uint32_t x132 = cmovznz(x122, x91, x55);
-{ uint32_t x133 = cmovznz(x122, x88, x52);
-{ uint32_t x134 = cmovznz(x122, x85, x49);
-out[0] = x123;
-out[1] = x124;
-out[2] = x125;
-out[3] = x126;
-out[4] = x127;
-out[5] = x128;
-out[6] = x129;
-out[7] = x130;
-out[8] = x131;
-out[9] = x132;
-out[10] = x133;
-out[11] = x134;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void feadd(uint32_t out[12], const uint32_t in1[12], const uint32_t in2[12]) {
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x46 = in2[11];
+ { const uint32_t x47 = in2[10];
+ { const uint32_t x45 = in2[9];
+ { const uint32_t x43 = in2[8];
+ { const uint32_t x41 = in2[7];
+ { const uint32_t x39 = in2[6];
+ { const uint32_t x37 = in2[5];
+ { const uint32_t x35 = in2[4];
+ { const uint32_t x33 = in2[3];
+ { const uint32_t x31 = in2[2];
+ { const uint32_t x29 = in2[1];
+ { const uint32_t x27 = in2[0];
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
+ { uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
+ { uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
+ { uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
+ { uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
+ { uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
+ { uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
+ { uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
+ { uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
+ { uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
+ { uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xfffffe5b, &x85);
+ { uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
+ { uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
+ { uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
+ { uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
+ { uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
+ { uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
+ { uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
+ { uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
+ { uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
+ { uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
+ { uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0x7fffffff, &x118);
+ { uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
+ { uint32_t x123 = cmovznz(x122, x118, x82);
+ { uint32_t x124 = cmovznz(x122, x115, x79);
+ { uint32_t x125 = cmovznz(x122, x112, x76);
+ { uint32_t x126 = cmovznz(x122, x109, x73);
+ { uint32_t x127 = cmovznz(x122, x106, x70);
+ { uint32_t x128 = cmovznz(x122, x103, x67);
+ { uint32_t x129 = cmovznz(x122, x100, x64);
+ { uint32_t x130 = cmovznz(x122, x97, x61);
+ { uint32_t x131 = cmovznz(x122, x94, x58);
+ { uint32_t x132 = cmovznz(x122, x91, x55);
+ { uint32_t x133 = cmovznz(x122, x88, x52);
+ { uint32_t x134 = cmovznz(x122, x85, x49);
+ out[0] = x134;
+ out[1] = x133;
+ out[2] = x132;
+ out[3] = x131;
+ out[4] = x130;
+ out[5] = x129;
+ out[6] = x128;
+ out[7] = x127;
+ out[8] = x126;
+ out[9] = x125;
+ out[10] = x124;
+ out[11] = x123;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e383m421/fenz.c b/src/Specific/montgomery32_2e383m421/fenz.c
index 047ff14c3..61ca36c29 100644
--- a/src/Specific/montgomery32_2e383m421/fenz.c
+++ b/src/Specific/montgomery32_2e383m421/fenz.c
@@ -1,33 +1,27 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x23 = (x22 | x21);
-{ uint32_t x24 = (x20 | x23);
-{ uint32_t x25 = (x18 | x24);
-{ uint32_t x26 = (x16 | x25);
-{ uint32_t x27 = (x14 | x26);
-{ uint32_t x28 = (x12 | x27);
-{ uint32_t x29 = (x10 | x28);
-{ uint32_t x30 = (x8 | x29);
-{ uint32_t x31 = (x6 | x30);
-{ uint32_t x32 = (x4 | x31);
-{ uint32_t x33 = (x2 | x32);
-out[0] = x33;
-}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x23 = (x22 | x21);
+ { uint32_t x24 = (x20 | x23);
+ { uint32_t x25 = (x18 | x24);
+ { uint32_t x26 = (x16 | x25);
+ { uint32_t x27 = (x14 | x26);
+ { uint32_t x28 = (x12 | x27);
+ { uint32_t x29 = (x10 | x28);
+ { uint32_t x30 = (x8 | x29);
+ { uint32_t x31 = (x6 | x30);
+ { uint32_t x32 = (x4 | x31);
+ { uint32_t x33 = (x2 | x32);
+ out[0] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/feadd.c b/src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/feadd.c
index a39adecf1..c502564e3 100644
--- a/src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/feadd.c
+++ b/src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/feadd.c
@@ -1,70 +1,76 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
-{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
-{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
-{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
-{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
-{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
-{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
-{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
-{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
-{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
-{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
-{ uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffffff, &x85);
-{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0x0, &x88);
-{ uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0x0, &x91);
-{ uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
-{ uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xfffffffe, &x97);
-{ uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
-{ uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
-{ uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
-{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
-{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
-{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
-{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0xffffffff, &x118);
-{ uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
-{ uint32_t x123 = cmovznz(x122, x118, x82);
-{ uint32_t x124 = cmovznz(x122, x115, x79);
-{ uint32_t x125 = cmovznz(x122, x112, x76);
-{ uint32_t x126 = cmovznz(x122, x109, x73);
-{ uint32_t x127 = cmovznz(x122, x106, x70);
-{ uint32_t x128 = cmovznz(x122, x103, x67);
-{ uint32_t x129 = cmovznz(x122, x100, x64);
-{ uint32_t x130 = cmovznz(x122, x97, x61);
-{ uint32_t x131 = cmovznz(x122, x94, x58);
-{ uint32_t x132 = cmovznz(x122, x91, x55);
-{ uint32_t x133 = cmovznz(x122, x88, x52);
-{ uint32_t x134 = cmovznz(x122, x85, x49);
-out[0] = x123;
-out[1] = x124;
-out[2] = x125;
-out[3] = x126;
-out[4] = x127;
-out[5] = x128;
-out[6] = x129;
-out[7] = x130;
-out[8] = x131;
-out[9] = x132;
-out[10] = x133;
-out[11] = x134;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void feadd(uint32_t out[12], const uint32_t in1[12], const uint32_t in2[12]) {
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x46 = in2[11];
+ { const uint32_t x47 = in2[10];
+ { const uint32_t x45 = in2[9];
+ { const uint32_t x43 = in2[8];
+ { const uint32_t x41 = in2[7];
+ { const uint32_t x39 = in2[6];
+ { const uint32_t x37 = in2[5];
+ { const uint32_t x35 = in2[4];
+ { const uint32_t x33 = in2[3];
+ { const uint32_t x31 = in2[2];
+ { const uint32_t x29 = in2[1];
+ { const uint32_t x27 = in2[0];
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
+ { uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
+ { uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
+ { uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
+ { uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
+ { uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
+ { uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
+ { uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
+ { uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
+ { uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
+ { uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffffff, &x85);
+ { uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0x0, &x88);
+ { uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0x0, &x91);
+ { uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
+ { uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xfffffffe, &x97);
+ { uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
+ { uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
+ { uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
+ { uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
+ { uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
+ { uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
+ { uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0xffffffff, &x118);
+ { uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
+ { uint32_t x123 = cmovznz(x122, x118, x82);
+ { uint32_t x124 = cmovznz(x122, x115, x79);
+ { uint32_t x125 = cmovznz(x122, x112, x76);
+ { uint32_t x126 = cmovznz(x122, x109, x73);
+ { uint32_t x127 = cmovznz(x122, x106, x70);
+ { uint32_t x128 = cmovznz(x122, x103, x67);
+ { uint32_t x129 = cmovznz(x122, x100, x64);
+ { uint32_t x130 = cmovznz(x122, x97, x61);
+ { uint32_t x131 = cmovznz(x122, x94, x58);
+ { uint32_t x132 = cmovznz(x122, x91, x55);
+ { uint32_t x133 = cmovznz(x122, x88, x52);
+ { uint32_t x134 = cmovznz(x122, x85, x49);
+ out[0] = x134;
+ out[1] = x133;
+ out[2] = x132;
+ out[3] = x131;
+ out[4] = x130;
+ out[5] = x129;
+ out[6] = x128;
+ out[7] = x127;
+ out[8] = x126;
+ out[9] = x125;
+ out[10] = x124;
+ out[11] = x123;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/fenz.c b/src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/fenz.c
index 047ff14c3..61ca36c29 100644
--- a/src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/fenz.c
+++ b/src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/fenz.c
@@ -1,33 +1,27 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x23 = (x22 | x21);
-{ uint32_t x24 = (x20 | x23);
-{ uint32_t x25 = (x18 | x24);
-{ uint32_t x26 = (x16 | x25);
-{ uint32_t x27 = (x14 | x26);
-{ uint32_t x28 = (x12 | x27);
-{ uint32_t x29 = (x10 | x28);
-{ uint32_t x30 = (x8 | x29);
-{ uint32_t x31 = (x6 | x30);
-{ uint32_t x32 = (x4 | x31);
-{ uint32_t x33 = (x2 | x32);
-out[0] = x33;
-}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x23 = (x22 | x21);
+ { uint32_t x24 = (x20 | x23);
+ { uint32_t x25 = (x18 | x24);
+ { uint32_t x26 = (x16 | x25);
+ { uint32_t x27 = (x14 | x26);
+ { uint32_t x28 = (x12 | x27);
+ { uint32_t x29 = (x10 | x28);
+ { uint32_t x30 = (x8 | x29);
+ { uint32_t x31 = (x6 | x30);
+ { uint32_t x32 = (x4 | x31);
+ { uint32_t x33 = (x2 | x32);
+ out[0] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e384m317/feadd.c b/src/Specific/montgomery32_2e384m317/feadd.c
index a527b27f5..531c2f325 100644
--- a/src/Specific/montgomery32_2e384m317/feadd.c
+++ b/src/Specific/montgomery32_2e384m317/feadd.c
@@ -1,70 +1,76 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
-{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
-{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
-{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
-{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
-{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
-{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
-{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
-{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
-{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
-{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
-{ uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xfffffec3, &x85);
-{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
-{ uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
-{ uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
-{ uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
-{ uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
-{ uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
-{ uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
-{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
-{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
-{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
-{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0xffffffff, &x118);
-{ uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
-{ uint32_t x123 = cmovznz(x122, x118, x82);
-{ uint32_t x124 = cmovznz(x122, x115, x79);
-{ uint32_t x125 = cmovznz(x122, x112, x76);
-{ uint32_t x126 = cmovznz(x122, x109, x73);
-{ uint32_t x127 = cmovznz(x122, x106, x70);
-{ uint32_t x128 = cmovznz(x122, x103, x67);
-{ uint32_t x129 = cmovznz(x122, x100, x64);
-{ uint32_t x130 = cmovznz(x122, x97, x61);
-{ uint32_t x131 = cmovznz(x122, x94, x58);
-{ uint32_t x132 = cmovznz(x122, x91, x55);
-{ uint32_t x133 = cmovznz(x122, x88, x52);
-{ uint32_t x134 = cmovznz(x122, x85, x49);
-out[0] = x123;
-out[1] = x124;
-out[2] = x125;
-out[3] = x126;
-out[4] = x127;
-out[5] = x128;
-out[6] = x129;
-out[7] = x130;
-out[8] = x131;
-out[9] = x132;
-out[10] = x133;
-out[11] = x134;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void feadd(uint32_t out[12], const uint32_t in1[12], const uint32_t in2[12]) {
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x46 = in2[11];
+ { const uint32_t x47 = in2[10];
+ { const uint32_t x45 = in2[9];
+ { const uint32_t x43 = in2[8];
+ { const uint32_t x41 = in2[7];
+ { const uint32_t x39 = in2[6];
+ { const uint32_t x37 = in2[5];
+ { const uint32_t x35 = in2[4];
+ { const uint32_t x33 = in2[3];
+ { const uint32_t x31 = in2[2];
+ { const uint32_t x29 = in2[1];
+ { const uint32_t x27 = in2[0];
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
+ { uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
+ { uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
+ { uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
+ { uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
+ { uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
+ { uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
+ { uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
+ { uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
+ { uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
+ { uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xfffffec3, &x85);
+ { uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
+ { uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
+ { uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
+ { uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
+ { uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
+ { uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
+ { uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
+ { uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
+ { uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
+ { uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
+ { uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0xffffffff, &x118);
+ { uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
+ { uint32_t x123 = cmovznz(x122, x118, x82);
+ { uint32_t x124 = cmovznz(x122, x115, x79);
+ { uint32_t x125 = cmovznz(x122, x112, x76);
+ { uint32_t x126 = cmovznz(x122, x109, x73);
+ { uint32_t x127 = cmovznz(x122, x106, x70);
+ { uint32_t x128 = cmovznz(x122, x103, x67);
+ { uint32_t x129 = cmovznz(x122, x100, x64);
+ { uint32_t x130 = cmovznz(x122, x97, x61);
+ { uint32_t x131 = cmovznz(x122, x94, x58);
+ { uint32_t x132 = cmovznz(x122, x91, x55);
+ { uint32_t x133 = cmovznz(x122, x88, x52);
+ { uint32_t x134 = cmovznz(x122, x85, x49);
+ out[0] = x134;
+ out[1] = x133;
+ out[2] = x132;
+ out[3] = x131;
+ out[4] = x130;
+ out[5] = x129;
+ out[6] = x128;
+ out[7] = x127;
+ out[8] = x126;
+ out[9] = x125;
+ out[10] = x124;
+ out[11] = x123;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e384m317/fenz.c b/src/Specific/montgomery32_2e384m317/fenz.c
index 047ff14c3..61ca36c29 100644
--- a/src/Specific/montgomery32_2e384m317/fenz.c
+++ b/src/Specific/montgomery32_2e384m317/fenz.c
@@ -1,33 +1,27 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x23 = (x22 | x21);
-{ uint32_t x24 = (x20 | x23);
-{ uint32_t x25 = (x18 | x24);
-{ uint32_t x26 = (x16 | x25);
-{ uint32_t x27 = (x14 | x26);
-{ uint32_t x28 = (x12 | x27);
-{ uint32_t x29 = (x10 | x28);
-{ uint32_t x30 = (x8 | x29);
-{ uint32_t x31 = (x6 | x30);
-{ uint32_t x32 = (x4 | x31);
-{ uint32_t x33 = (x2 | x32);
-out[0] = x33;
-}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x23 = (x22 | x21);
+ { uint32_t x24 = (x20 | x23);
+ { uint32_t x25 = (x18 | x24);
+ { uint32_t x26 = (x16 | x25);
+ { uint32_t x27 = (x14 | x26);
+ { uint32_t x28 = (x12 | x27);
+ { uint32_t x29 = (x10 | x28);
+ { uint32_t x30 = (x8 | x29);
+ { uint32_t x31 = (x6 | x30);
+ { uint32_t x32 = (x4 | x31);
+ { uint32_t x33 = (x2 | x32);
+ out[0] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e384m5x2e368m1/feadd.c b/src/Specific/montgomery32_2e384m5x2e368m1/feadd.c
index 06a36e89b..b987e3baa 100644
--- a/src/Specific/montgomery32_2e384m5x2e368m1/feadd.c
+++ b/src/Specific/montgomery32_2e384m5x2e368m1/feadd.c
@@ -1,70 +1,76 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
-{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
-{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
-{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
-{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
-{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
-{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
-{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
-{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
-{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
-{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
-{ uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffffff, &x85);
-{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
-{ uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
-{ uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
-{ uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
-{ uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
-{ uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
-{ uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
-{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
-{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
-{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
-{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0xfffaffff, &x118);
-{ uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
-{ uint32_t x123 = cmovznz(x122, x118, x82);
-{ uint32_t x124 = cmovznz(x122, x115, x79);
-{ uint32_t x125 = cmovznz(x122, x112, x76);
-{ uint32_t x126 = cmovznz(x122, x109, x73);
-{ uint32_t x127 = cmovznz(x122, x106, x70);
-{ uint32_t x128 = cmovznz(x122, x103, x67);
-{ uint32_t x129 = cmovznz(x122, x100, x64);
-{ uint32_t x130 = cmovznz(x122, x97, x61);
-{ uint32_t x131 = cmovznz(x122, x94, x58);
-{ uint32_t x132 = cmovznz(x122, x91, x55);
-{ uint32_t x133 = cmovznz(x122, x88, x52);
-{ uint32_t x134 = cmovznz(x122, x85, x49);
-out[0] = x123;
-out[1] = x124;
-out[2] = x125;
-out[3] = x126;
-out[4] = x127;
-out[5] = x128;
-out[6] = x129;
-out[7] = x130;
-out[8] = x131;
-out[9] = x132;
-out[10] = x133;
-out[11] = x134;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void feadd(uint32_t out[12], const uint32_t in1[12], const uint32_t in2[12]) {
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x46 = in2[11];
+ { const uint32_t x47 = in2[10];
+ { const uint32_t x45 = in2[9];
+ { const uint32_t x43 = in2[8];
+ { const uint32_t x41 = in2[7];
+ { const uint32_t x39 = in2[6];
+ { const uint32_t x37 = in2[5];
+ { const uint32_t x35 = in2[4];
+ { const uint32_t x33 = in2[3];
+ { const uint32_t x31 = in2[2];
+ { const uint32_t x29 = in2[1];
+ { const uint32_t x27 = in2[0];
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
+ { uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
+ { uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
+ { uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
+ { uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
+ { uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
+ { uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
+ { uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
+ { uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
+ { uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
+ { uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffffff, &x85);
+ { uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
+ { uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
+ { uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
+ { uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
+ { uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
+ { uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
+ { uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
+ { uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
+ { uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
+ { uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
+ { uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0xfffaffff, &x118);
+ { uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
+ { uint32_t x123 = cmovznz(x122, x118, x82);
+ { uint32_t x124 = cmovznz(x122, x115, x79);
+ { uint32_t x125 = cmovznz(x122, x112, x76);
+ { uint32_t x126 = cmovznz(x122, x109, x73);
+ { uint32_t x127 = cmovznz(x122, x106, x70);
+ { uint32_t x128 = cmovznz(x122, x103, x67);
+ { uint32_t x129 = cmovznz(x122, x100, x64);
+ { uint32_t x130 = cmovznz(x122, x97, x61);
+ { uint32_t x131 = cmovznz(x122, x94, x58);
+ { uint32_t x132 = cmovznz(x122, x91, x55);
+ { uint32_t x133 = cmovznz(x122, x88, x52);
+ { uint32_t x134 = cmovznz(x122, x85, x49);
+ out[0] = x134;
+ out[1] = x133;
+ out[2] = x132;
+ out[3] = x131;
+ out[4] = x130;
+ out[5] = x129;
+ out[6] = x128;
+ out[7] = x127;
+ out[8] = x126;
+ out[9] = x125;
+ out[10] = x124;
+ out[11] = x123;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e384m5x2e368m1/fenz.c b/src/Specific/montgomery32_2e384m5x2e368m1/fenz.c
index 047ff14c3..61ca36c29 100644
--- a/src/Specific/montgomery32_2e384m5x2e368m1/fenz.c
+++ b/src/Specific/montgomery32_2e384m5x2e368m1/fenz.c
@@ -1,33 +1,27 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x23 = (x22 | x21);
-{ uint32_t x24 = (x20 | x23);
-{ uint32_t x25 = (x18 | x24);
-{ uint32_t x26 = (x16 | x25);
-{ uint32_t x27 = (x14 | x26);
-{ uint32_t x28 = (x12 | x27);
-{ uint32_t x29 = (x10 | x28);
-{ uint32_t x30 = (x8 | x29);
-{ uint32_t x31 = (x6 | x30);
-{ uint32_t x32 = (x4 | x31);
-{ uint32_t x33 = (x2 | x32);
-out[0] = x33;
-}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x23 = (x22 | x21);
+ { uint32_t x24 = (x20 | x23);
+ { uint32_t x25 = (x18 | x24);
+ { uint32_t x26 = (x16 | x25);
+ { uint32_t x27 = (x14 | x26);
+ { uint32_t x28 = (x12 | x27);
+ { uint32_t x29 = (x10 | x28);
+ { uint32_t x30 = (x8 | x29);
+ { uint32_t x31 = (x6 | x30);
+ { uint32_t x32 = (x4 | x31);
+ { uint32_t x33 = (x2 | x32);
+ out[0] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e384m79x2e376m1/feadd.c b/src/Specific/montgomery32_2e384m79x2e376m1/feadd.c
index f52ce38ec..7ef8740fc 100644
--- a/src/Specific/montgomery32_2e384m79x2e376m1/feadd.c
+++ b/src/Specific/montgomery32_2e384m79x2e376m1/feadd.c
@@ -1,70 +1,76 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
-{ uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
-{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
-{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
-{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
-{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
-{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
-{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
-{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
-{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
-{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
-{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
-{ uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffffff, &x85);
-{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
-{ uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
-{ uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
-{ uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
-{ uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
-{ uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
-{ uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
-{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
-{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
-{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
-{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0xb0ffffff, &x118);
-{ uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
-{ uint32_t x123 = cmovznz(x122, x118, x82);
-{ uint32_t x124 = cmovznz(x122, x115, x79);
-{ uint32_t x125 = cmovznz(x122, x112, x76);
-{ uint32_t x126 = cmovznz(x122, x109, x73);
-{ uint32_t x127 = cmovznz(x122, x106, x70);
-{ uint32_t x128 = cmovznz(x122, x103, x67);
-{ uint32_t x129 = cmovznz(x122, x100, x64);
-{ uint32_t x130 = cmovznz(x122, x97, x61);
-{ uint32_t x131 = cmovznz(x122, x94, x58);
-{ uint32_t x132 = cmovznz(x122, x91, x55);
-{ uint32_t x133 = cmovznz(x122, x88, x52);
-{ uint32_t x134 = cmovznz(x122, x85, x49);
-out[0] = x123;
-out[1] = x124;
-out[2] = x125;
-out[3] = x126;
-out[4] = x127;
-out[5] = x128;
-out[6] = x129;
-out[7] = x130;
-out[8] = x131;
-out[9] = x132;
-out[10] = x133;
-out[11] = x134;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void feadd(uint32_t out[12], const uint32_t in1[12], const uint32_t in2[12]) {
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x46 = in2[11];
+ { const uint32_t x47 = in2[10];
+ { const uint32_t x45 = in2[9];
+ { const uint32_t x43 = in2[8];
+ { const uint32_t x41 = in2[7];
+ { const uint32_t x39 = in2[6];
+ { const uint32_t x37 = in2[5];
+ { const uint32_t x35 = in2[4];
+ { const uint32_t x33 = in2[3];
+ { const uint32_t x31 = in2[2];
+ { const uint32_t x29 = in2[1];
+ { const uint32_t x27 = in2[0];
+ { uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
+ { uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
+ { uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
+ { uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
+ { uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
+ { uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
+ { uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
+ { uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
+ { uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
+ { uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
+ { uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffffff, &x85);
+ { uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
+ { uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
+ { uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
+ { uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
+ { uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
+ { uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
+ { uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
+ { uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
+ { uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
+ { uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
+ { uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0xb0ffffff, &x118);
+ { uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
+ { uint32_t x123 = cmovznz(x122, x118, x82);
+ { uint32_t x124 = cmovznz(x122, x115, x79);
+ { uint32_t x125 = cmovznz(x122, x112, x76);
+ { uint32_t x126 = cmovznz(x122, x109, x73);
+ { uint32_t x127 = cmovznz(x122, x106, x70);
+ { uint32_t x128 = cmovznz(x122, x103, x67);
+ { uint32_t x129 = cmovznz(x122, x100, x64);
+ { uint32_t x130 = cmovznz(x122, x97, x61);
+ { uint32_t x131 = cmovznz(x122, x94, x58);
+ { uint32_t x132 = cmovznz(x122, x91, x55);
+ { uint32_t x133 = cmovznz(x122, x88, x52);
+ { uint32_t x134 = cmovznz(x122, x85, x49);
+ out[0] = x134;
+ out[1] = x133;
+ out[2] = x132;
+ out[3] = x131;
+ out[4] = x130;
+ out[5] = x129;
+ out[6] = x128;
+ out[7] = x127;
+ out[8] = x126;
+ out[9] = x125;
+ out[10] = x124;
+ out[11] = x123;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e384m79x2e376m1/fenz.c b/src/Specific/montgomery32_2e384m79x2e376m1/fenz.c
index 047ff14c3..61ca36c29 100644
--- a/src/Specific/montgomery32_2e384m79x2e376m1/fenz.c
+++ b/src/Specific/montgomery32_2e384m79x2e376m1/fenz.c
@@ -1,33 +1,27 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x23 = (x22 | x21);
-{ uint32_t x24 = (x20 | x23);
-{ uint32_t x25 = (x18 | x24);
-{ uint32_t x26 = (x16 | x25);
-{ uint32_t x27 = (x14 | x26);
-{ uint32_t x28 = (x12 | x27);
-{ uint32_t x29 = (x10 | x28);
-{ uint32_t x30 = (x8 | x29);
-{ uint32_t x31 = (x6 | x30);
-{ uint32_t x32 = (x4 | x31);
-{ uint32_t x33 = (x2 | x32);
-out[0] = x33;
-}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x23 = (x22 | x21);
+ { uint32_t x24 = (x20 | x23);
+ { uint32_t x25 = (x18 | x24);
+ { uint32_t x26 = (x16 | x25);
+ { uint32_t x27 = (x14 | x26);
+ { uint32_t x28 = (x12 | x27);
+ { uint32_t x29 = (x10 | x28);
+ { uint32_t x30 = (x8 | x29);
+ { uint32_t x31 = (x6 | x30);
+ { uint32_t x32 = (x4 | x31);
+ { uint32_t x33 = (x2 | x32);
+ out[0] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e389m21/feadd.c b/src/Specific/montgomery32_2e389m21/feadd.c
index d4266a546..f84bbc134 100644
--- a/src/Specific/montgomery32_2e389m21/feadd.c
+++ b/src/Specific/montgomery32_2e389m21/feadd.c
@@ -1,74 +1,82 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x50, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29)
-{ uint32_t x53; uint8_t x54 = _addcarryx_u32(0x0, x5, x29, &x53);
-{ uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x7, x31, &x56);
-{ uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x9, x33, &x59);
-{ uint32_t x62; uint8_t x63 = _addcarryx_u32(x60, x11, x35, &x62);
-{ uint32_t x65; uint8_t x66 = _addcarryx_u32(x63, x13, x37, &x65);
-{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x15, x39, &x68);
-{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x17, x41, &x71);
-{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x19, x43, &x74);
-{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x21, x45, &x77);
-{ uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x23, x47, &x80);
-{ uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x25, x49, &x83);
-{ uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x27, x51, &x86);
-{ uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x26, x50, &x89);
-{ uint32_t x92; uint8_t x93 = _subborrow_u32(0x0, x53, 0xffffffeb, &x92);
-{ uint32_t x95; uint8_t x96 = _subborrow_u32(x93, x56, 0xffffffff, &x95);
-{ uint32_t x98; uint8_t x99 = _subborrow_u32(x96, x59, 0xffffffff, &x98);
-{ uint32_t x101; uint8_t x102 = _subborrow_u32(x99, x62, 0xffffffff, &x101);
-{ uint32_t x104; uint8_t x105 = _subborrow_u32(x102, x65, 0xffffffff, &x104);
-{ uint32_t x107; uint8_t x108 = _subborrow_u32(x105, x68, 0xffffffff, &x107);
-{ uint32_t x110; uint8_t x111 = _subborrow_u32(x108, x71, 0xffffffff, &x110);
-{ uint32_t x113; uint8_t x114 = _subborrow_u32(x111, x74, 0xffffffff, &x113);
-{ uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x77, 0xffffffff, &x116);
-{ uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x80, 0xffffffff, &x119);
-{ uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x83, 0xffffffff, &x122);
-{ uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x86, 0xffffffff, &x125);
-{ uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x89, 0x1f, &x128);
-{ uint32_t _; uint8_t x132 = _subborrow_u32(x129, x90, 0x0, &_);
-{ uint32_t x133 = cmovznz(x132, x128, x89);
-{ uint32_t x134 = cmovznz(x132, x125, x86);
-{ uint32_t x135 = cmovznz(x132, x122, x83);
-{ uint32_t x136 = cmovznz(x132, x119, x80);
-{ uint32_t x137 = cmovznz(x132, x116, x77);
-{ uint32_t x138 = cmovznz(x132, x113, x74);
-{ uint32_t x139 = cmovznz(x132, x110, x71);
-{ uint32_t x140 = cmovznz(x132, x107, x68);
-{ uint32_t x141 = cmovznz(x132, x104, x65);
-{ uint32_t x142 = cmovznz(x132, x101, x62);
-{ uint32_t x143 = cmovznz(x132, x98, x59);
-{ uint32_t x144 = cmovznz(x132, x95, x56);
-{ uint32_t x145 = cmovznz(x132, x92, x53);
-out[0] = x133;
-out[1] = x134;
-out[2] = x135;
-out[3] = x136;
-out[4] = x137;
-out[5] = x138;
-out[6] = x139;
-out[7] = x140;
-out[8] = x141;
-out[9] = x142;
-out[10] = x143;
-out[11] = x144;
-out[12] = x145;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[13];
+static void feadd(uint32_t out[13], const uint32_t in1[13], const uint32_t in2[13]) {
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x50 = in2[12];
+ { const uint32_t x51 = in2[11];
+ { const uint32_t x49 = in2[10];
+ { const uint32_t x47 = in2[9];
+ { const uint32_t x45 = in2[8];
+ { const uint32_t x43 = in2[7];
+ { const uint32_t x41 = in2[6];
+ { const uint32_t x39 = in2[5];
+ { const uint32_t x37 = in2[4];
+ { const uint32_t x35 = in2[3];
+ { const uint32_t x33 = in2[2];
+ { const uint32_t x31 = in2[1];
+ { const uint32_t x29 = in2[0];
+ { uint32_t x53; uint8_t x54 = _addcarryx_u32(0x0, x5, x29, &x53);
+ { uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x7, x31, &x56);
+ { uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x9, x33, &x59);
+ { uint32_t x62; uint8_t x63 = _addcarryx_u32(x60, x11, x35, &x62);
+ { uint32_t x65; uint8_t x66 = _addcarryx_u32(x63, x13, x37, &x65);
+ { uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x15, x39, &x68);
+ { uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x17, x41, &x71);
+ { uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x19, x43, &x74);
+ { uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x21, x45, &x77);
+ { uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x23, x47, &x80);
+ { uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x25, x49, &x83);
+ { uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x27, x51, &x86);
+ { uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x26, x50, &x89);
+ { uint32_t x92; uint8_t x93 = _subborrow_u32(0x0, x53, 0xffffffeb, &x92);
+ { uint32_t x95; uint8_t x96 = _subborrow_u32(x93, x56, 0xffffffff, &x95);
+ { uint32_t x98; uint8_t x99 = _subborrow_u32(x96, x59, 0xffffffff, &x98);
+ { uint32_t x101; uint8_t x102 = _subborrow_u32(x99, x62, 0xffffffff, &x101);
+ { uint32_t x104; uint8_t x105 = _subborrow_u32(x102, x65, 0xffffffff, &x104);
+ { uint32_t x107; uint8_t x108 = _subborrow_u32(x105, x68, 0xffffffff, &x107);
+ { uint32_t x110; uint8_t x111 = _subborrow_u32(x108, x71, 0xffffffff, &x110);
+ { uint32_t x113; uint8_t x114 = _subborrow_u32(x111, x74, 0xffffffff, &x113);
+ { uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x77, 0xffffffff, &x116);
+ { uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x80, 0xffffffff, &x119);
+ { uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x83, 0xffffffff, &x122);
+ { uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x86, 0xffffffff, &x125);
+ { uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x89, 0x1f, &x128);
+ { uint32_t _; uint8_t x132 = _subborrow_u32(x129, x90, 0x0, &_);
+ { uint32_t x133 = cmovznz(x132, x128, x89);
+ { uint32_t x134 = cmovznz(x132, x125, x86);
+ { uint32_t x135 = cmovznz(x132, x122, x83);
+ { uint32_t x136 = cmovznz(x132, x119, x80);
+ { uint32_t x137 = cmovznz(x132, x116, x77);
+ { uint32_t x138 = cmovznz(x132, x113, x74);
+ { uint32_t x139 = cmovznz(x132, x110, x71);
+ { uint32_t x140 = cmovznz(x132, x107, x68);
+ { uint32_t x141 = cmovznz(x132, x104, x65);
+ { uint32_t x142 = cmovznz(x132, x101, x62);
+ { uint32_t x143 = cmovznz(x132, x98, x59);
+ { uint32_t x144 = cmovznz(x132, x95, x56);
+ { uint32_t x145 = cmovznz(x132, x92, x53);
+ out[0] = x145;
+ out[1] = x144;
+ out[2] = x143;
+ out[3] = x142;
+ out[4] = x141;
+ out[5] = x140;
+ out[6] = x139;
+ out[7] = x138;
+ out[8] = x137;
+ out[9] = x136;
+ out[10] = x135;
+ out[11] = x134;
+ out[12] = x133;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e389m21/fenz.c b/src/Specific/montgomery32_2e389m21/fenz.c
index 521513c0d..c34513bef 100644
--- a/src/Specific/montgomery32_2e389m21/fenz.c
+++ b/src/Specific/montgomery32_2e389m21/fenz.c
@@ -1,34 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x23, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x25 = (x24 | x23);
-{ uint32_t x26 = (x22 | x25);
-{ uint32_t x27 = (x20 | x26);
-{ uint32_t x28 = (x18 | x27);
-{ uint32_t x29 = (x16 | x28);
-{ uint32_t x30 = (x14 | x29);
-{ uint32_t x31 = (x12 | x30);
-{ uint32_t x32 = (x10 | x31);
-{ uint32_t x33 = (x8 | x32);
-{ uint32_t x34 = (x6 | x33);
-{ uint32_t x35 = (x4 | x34);
-{ uint32_t x36 = (x2 | x35);
-out[0] = x36;
-}}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[13]) {
+ { const uint32_t x23 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x25 = (x24 | x23);
+ { uint32_t x26 = (x22 | x25);
+ { uint32_t x27 = (x20 | x26);
+ { uint32_t x28 = (x18 | x27);
+ { uint32_t x29 = (x16 | x28);
+ { uint32_t x30 = (x14 | x29);
+ { uint32_t x31 = (x12 | x30);
+ { uint32_t x32 = (x10 | x31);
+ { uint32_t x33 = (x8 | x32);
+ { uint32_t x34 = (x6 | x33);
+ { uint32_t x35 = (x4 | x34);
+ { uint32_t x36 = (x2 | x35);
+ out[0] = x36;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e401m31/feadd.c b/src/Specific/montgomery32_2e401m31/feadd.c
index cedd0ac77..b90810dd4 100644
--- a/src/Specific/montgomery32_2e401m31/feadd.c
+++ b/src/Specific/montgomery32_2e401m31/feadd.c
@@ -1,74 +1,82 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x50, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29)
-{ uint32_t x53; uint8_t x54 = _addcarryx_u32(0x0, x5, x29, &x53);
-{ uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x7, x31, &x56);
-{ uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x9, x33, &x59);
-{ uint32_t x62; uint8_t x63 = _addcarryx_u32(x60, x11, x35, &x62);
-{ uint32_t x65; uint8_t x66 = _addcarryx_u32(x63, x13, x37, &x65);
-{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x15, x39, &x68);
-{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x17, x41, &x71);
-{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x19, x43, &x74);
-{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x21, x45, &x77);
-{ uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x23, x47, &x80);
-{ uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x25, x49, &x83);
-{ uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x27, x51, &x86);
-{ uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x26, x50, &x89);
-{ uint32_t x92; uint8_t x93 = _subborrow_u32(0x0, x53, 0xffffffe1, &x92);
-{ uint32_t x95; uint8_t x96 = _subborrow_u32(x93, x56, 0xffffffff, &x95);
-{ uint32_t x98; uint8_t x99 = _subborrow_u32(x96, x59, 0xffffffff, &x98);
-{ uint32_t x101; uint8_t x102 = _subborrow_u32(x99, x62, 0xffffffff, &x101);
-{ uint32_t x104; uint8_t x105 = _subborrow_u32(x102, x65, 0xffffffff, &x104);
-{ uint32_t x107; uint8_t x108 = _subborrow_u32(x105, x68, 0xffffffff, &x107);
-{ uint32_t x110; uint8_t x111 = _subborrow_u32(x108, x71, 0xffffffff, &x110);
-{ uint32_t x113; uint8_t x114 = _subborrow_u32(x111, x74, 0xffffffff, &x113);
-{ uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x77, 0xffffffff, &x116);
-{ uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x80, 0xffffffff, &x119);
-{ uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x83, 0xffffffff, &x122);
-{ uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x86, 0xffffffff, &x125);
-{ uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x89, 0x1ffff, &x128);
-{ uint32_t _; uint8_t x132 = _subborrow_u32(x129, x90, 0x0, &_);
-{ uint32_t x133 = cmovznz(x132, x128, x89);
-{ uint32_t x134 = cmovznz(x132, x125, x86);
-{ uint32_t x135 = cmovznz(x132, x122, x83);
-{ uint32_t x136 = cmovznz(x132, x119, x80);
-{ uint32_t x137 = cmovznz(x132, x116, x77);
-{ uint32_t x138 = cmovznz(x132, x113, x74);
-{ uint32_t x139 = cmovznz(x132, x110, x71);
-{ uint32_t x140 = cmovznz(x132, x107, x68);
-{ uint32_t x141 = cmovznz(x132, x104, x65);
-{ uint32_t x142 = cmovznz(x132, x101, x62);
-{ uint32_t x143 = cmovznz(x132, x98, x59);
-{ uint32_t x144 = cmovznz(x132, x95, x56);
-{ uint32_t x145 = cmovznz(x132, x92, x53);
-out[0] = x133;
-out[1] = x134;
-out[2] = x135;
-out[3] = x136;
-out[4] = x137;
-out[5] = x138;
-out[6] = x139;
-out[7] = x140;
-out[8] = x141;
-out[9] = x142;
-out[10] = x143;
-out[11] = x144;
-out[12] = x145;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[13];
+static void feadd(uint32_t out[13], const uint32_t in1[13], const uint32_t in2[13]) {
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x50 = in2[12];
+ { const uint32_t x51 = in2[11];
+ { const uint32_t x49 = in2[10];
+ { const uint32_t x47 = in2[9];
+ { const uint32_t x45 = in2[8];
+ { const uint32_t x43 = in2[7];
+ { const uint32_t x41 = in2[6];
+ { const uint32_t x39 = in2[5];
+ { const uint32_t x37 = in2[4];
+ { const uint32_t x35 = in2[3];
+ { const uint32_t x33 = in2[2];
+ { const uint32_t x31 = in2[1];
+ { const uint32_t x29 = in2[0];
+ { uint32_t x53; uint8_t x54 = _addcarryx_u32(0x0, x5, x29, &x53);
+ { uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x7, x31, &x56);
+ { uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x9, x33, &x59);
+ { uint32_t x62; uint8_t x63 = _addcarryx_u32(x60, x11, x35, &x62);
+ { uint32_t x65; uint8_t x66 = _addcarryx_u32(x63, x13, x37, &x65);
+ { uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x15, x39, &x68);
+ { uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x17, x41, &x71);
+ { uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x19, x43, &x74);
+ { uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x21, x45, &x77);
+ { uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x23, x47, &x80);
+ { uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x25, x49, &x83);
+ { uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x27, x51, &x86);
+ { uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x26, x50, &x89);
+ { uint32_t x92; uint8_t x93 = _subborrow_u32(0x0, x53, 0xffffffe1, &x92);
+ { uint32_t x95; uint8_t x96 = _subborrow_u32(x93, x56, 0xffffffff, &x95);
+ { uint32_t x98; uint8_t x99 = _subborrow_u32(x96, x59, 0xffffffff, &x98);
+ { uint32_t x101; uint8_t x102 = _subborrow_u32(x99, x62, 0xffffffff, &x101);
+ { uint32_t x104; uint8_t x105 = _subborrow_u32(x102, x65, 0xffffffff, &x104);
+ { uint32_t x107; uint8_t x108 = _subborrow_u32(x105, x68, 0xffffffff, &x107);
+ { uint32_t x110; uint8_t x111 = _subborrow_u32(x108, x71, 0xffffffff, &x110);
+ { uint32_t x113; uint8_t x114 = _subborrow_u32(x111, x74, 0xffffffff, &x113);
+ { uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x77, 0xffffffff, &x116);
+ { uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x80, 0xffffffff, &x119);
+ { uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x83, 0xffffffff, &x122);
+ { uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x86, 0xffffffff, &x125);
+ { uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x89, 0x1ffff, &x128);
+ { uint32_t _; uint8_t x132 = _subborrow_u32(x129, x90, 0x0, &_);
+ { uint32_t x133 = cmovznz(x132, x128, x89);
+ { uint32_t x134 = cmovznz(x132, x125, x86);
+ { uint32_t x135 = cmovznz(x132, x122, x83);
+ { uint32_t x136 = cmovznz(x132, x119, x80);
+ { uint32_t x137 = cmovznz(x132, x116, x77);
+ { uint32_t x138 = cmovznz(x132, x113, x74);
+ { uint32_t x139 = cmovznz(x132, x110, x71);
+ { uint32_t x140 = cmovznz(x132, x107, x68);
+ { uint32_t x141 = cmovznz(x132, x104, x65);
+ { uint32_t x142 = cmovznz(x132, x101, x62);
+ { uint32_t x143 = cmovznz(x132, x98, x59);
+ { uint32_t x144 = cmovznz(x132, x95, x56);
+ { uint32_t x145 = cmovznz(x132, x92, x53);
+ out[0] = x145;
+ out[1] = x144;
+ out[2] = x143;
+ out[3] = x142;
+ out[4] = x141;
+ out[5] = x140;
+ out[6] = x139;
+ out[7] = x138;
+ out[8] = x137;
+ out[9] = x136;
+ out[10] = x135;
+ out[11] = x134;
+ out[12] = x133;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e401m31/fenz.c b/src/Specific/montgomery32_2e401m31/fenz.c
index 521513c0d..c34513bef 100644
--- a/src/Specific/montgomery32_2e401m31/fenz.c
+++ b/src/Specific/montgomery32_2e401m31/fenz.c
@@ -1,34 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x23, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x25 = (x24 | x23);
-{ uint32_t x26 = (x22 | x25);
-{ uint32_t x27 = (x20 | x26);
-{ uint32_t x28 = (x18 | x27);
-{ uint32_t x29 = (x16 | x28);
-{ uint32_t x30 = (x14 | x29);
-{ uint32_t x31 = (x12 | x30);
-{ uint32_t x32 = (x10 | x31);
-{ uint32_t x33 = (x8 | x32);
-{ uint32_t x34 = (x6 | x33);
-{ uint32_t x35 = (x4 | x34);
-{ uint32_t x36 = (x2 | x35);
-out[0] = x36;
-}}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[13]) {
+ { const uint32_t x23 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x25 = (x24 | x23);
+ { uint32_t x26 = (x22 | x25);
+ { uint32_t x27 = (x20 | x26);
+ { uint32_t x28 = (x18 | x27);
+ { uint32_t x29 = (x16 | x28);
+ { uint32_t x30 = (x14 | x29);
+ { uint32_t x31 = (x12 | x30);
+ { uint32_t x32 = (x10 | x31);
+ { uint32_t x33 = (x8 | x32);
+ { uint32_t x34 = (x6 | x33);
+ { uint32_t x35 = (x4 | x34);
+ { uint32_t x36 = (x2 | x35);
+ out[0] = x36;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e413m21/feadd.c b/src/Specific/montgomery32_2e413m21/feadd.c
index 9d4ce928b..721eff4d6 100644
--- a/src/Specific/montgomery32_2e413m21/feadd.c
+++ b/src/Specific/montgomery32_2e413m21/feadd.c
@@ -1,74 +1,82 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x50, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29)
-{ uint32_t x53; uint8_t x54 = _addcarryx_u32(0x0, x5, x29, &x53);
-{ uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x7, x31, &x56);
-{ uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x9, x33, &x59);
-{ uint32_t x62; uint8_t x63 = _addcarryx_u32(x60, x11, x35, &x62);
-{ uint32_t x65; uint8_t x66 = _addcarryx_u32(x63, x13, x37, &x65);
-{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x15, x39, &x68);
-{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x17, x41, &x71);
-{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x19, x43, &x74);
-{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x21, x45, &x77);
-{ uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x23, x47, &x80);
-{ uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x25, x49, &x83);
-{ uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x27, x51, &x86);
-{ uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x26, x50, &x89);
-{ uint32_t x92; uint8_t x93 = _subborrow_u32(0x0, x53, 0xffffffeb, &x92);
-{ uint32_t x95; uint8_t x96 = _subborrow_u32(x93, x56, 0xffffffff, &x95);
-{ uint32_t x98; uint8_t x99 = _subborrow_u32(x96, x59, 0xffffffff, &x98);
-{ uint32_t x101; uint8_t x102 = _subborrow_u32(x99, x62, 0xffffffff, &x101);
-{ uint32_t x104; uint8_t x105 = _subborrow_u32(x102, x65, 0xffffffff, &x104);
-{ uint32_t x107; uint8_t x108 = _subborrow_u32(x105, x68, 0xffffffff, &x107);
-{ uint32_t x110; uint8_t x111 = _subborrow_u32(x108, x71, 0xffffffff, &x110);
-{ uint32_t x113; uint8_t x114 = _subborrow_u32(x111, x74, 0xffffffff, &x113);
-{ uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x77, 0xffffffff, &x116);
-{ uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x80, 0xffffffff, &x119);
-{ uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x83, 0xffffffff, &x122);
-{ uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x86, 0xffffffff, &x125);
-{ uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x89, 0x1fffffff, &x128);
-{ uint32_t _; uint8_t x132 = _subborrow_u32(x129, x90, 0x0, &_);
-{ uint32_t x133 = cmovznz(x132, x128, x89);
-{ uint32_t x134 = cmovznz(x132, x125, x86);
-{ uint32_t x135 = cmovznz(x132, x122, x83);
-{ uint32_t x136 = cmovznz(x132, x119, x80);
-{ uint32_t x137 = cmovznz(x132, x116, x77);
-{ uint32_t x138 = cmovznz(x132, x113, x74);
-{ uint32_t x139 = cmovznz(x132, x110, x71);
-{ uint32_t x140 = cmovznz(x132, x107, x68);
-{ uint32_t x141 = cmovznz(x132, x104, x65);
-{ uint32_t x142 = cmovznz(x132, x101, x62);
-{ uint32_t x143 = cmovznz(x132, x98, x59);
-{ uint32_t x144 = cmovznz(x132, x95, x56);
-{ uint32_t x145 = cmovznz(x132, x92, x53);
-out[0] = x133;
-out[1] = x134;
-out[2] = x135;
-out[3] = x136;
-out[4] = x137;
-out[5] = x138;
-out[6] = x139;
-out[7] = x140;
-out[8] = x141;
-out[9] = x142;
-out[10] = x143;
-out[11] = x144;
-out[12] = x145;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[13];
+static void feadd(uint32_t out[13], const uint32_t in1[13], const uint32_t in2[13]) {
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x50 = in2[12];
+ { const uint32_t x51 = in2[11];
+ { const uint32_t x49 = in2[10];
+ { const uint32_t x47 = in2[9];
+ { const uint32_t x45 = in2[8];
+ { const uint32_t x43 = in2[7];
+ { const uint32_t x41 = in2[6];
+ { const uint32_t x39 = in2[5];
+ { const uint32_t x37 = in2[4];
+ { const uint32_t x35 = in2[3];
+ { const uint32_t x33 = in2[2];
+ { const uint32_t x31 = in2[1];
+ { const uint32_t x29 = in2[0];
+ { uint32_t x53; uint8_t x54 = _addcarryx_u32(0x0, x5, x29, &x53);
+ { uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x7, x31, &x56);
+ { uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x9, x33, &x59);
+ { uint32_t x62; uint8_t x63 = _addcarryx_u32(x60, x11, x35, &x62);
+ { uint32_t x65; uint8_t x66 = _addcarryx_u32(x63, x13, x37, &x65);
+ { uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x15, x39, &x68);
+ { uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x17, x41, &x71);
+ { uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x19, x43, &x74);
+ { uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x21, x45, &x77);
+ { uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x23, x47, &x80);
+ { uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x25, x49, &x83);
+ { uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x27, x51, &x86);
+ { uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x26, x50, &x89);
+ { uint32_t x92; uint8_t x93 = _subborrow_u32(0x0, x53, 0xffffffeb, &x92);
+ { uint32_t x95; uint8_t x96 = _subborrow_u32(x93, x56, 0xffffffff, &x95);
+ { uint32_t x98; uint8_t x99 = _subborrow_u32(x96, x59, 0xffffffff, &x98);
+ { uint32_t x101; uint8_t x102 = _subborrow_u32(x99, x62, 0xffffffff, &x101);
+ { uint32_t x104; uint8_t x105 = _subborrow_u32(x102, x65, 0xffffffff, &x104);
+ { uint32_t x107; uint8_t x108 = _subborrow_u32(x105, x68, 0xffffffff, &x107);
+ { uint32_t x110; uint8_t x111 = _subborrow_u32(x108, x71, 0xffffffff, &x110);
+ { uint32_t x113; uint8_t x114 = _subborrow_u32(x111, x74, 0xffffffff, &x113);
+ { uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x77, 0xffffffff, &x116);
+ { uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x80, 0xffffffff, &x119);
+ { uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x83, 0xffffffff, &x122);
+ { uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x86, 0xffffffff, &x125);
+ { uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x89, 0x1fffffff, &x128);
+ { uint32_t _; uint8_t x132 = _subborrow_u32(x129, x90, 0x0, &_);
+ { uint32_t x133 = cmovznz(x132, x128, x89);
+ { uint32_t x134 = cmovznz(x132, x125, x86);
+ { uint32_t x135 = cmovznz(x132, x122, x83);
+ { uint32_t x136 = cmovznz(x132, x119, x80);
+ { uint32_t x137 = cmovznz(x132, x116, x77);
+ { uint32_t x138 = cmovznz(x132, x113, x74);
+ { uint32_t x139 = cmovznz(x132, x110, x71);
+ { uint32_t x140 = cmovznz(x132, x107, x68);
+ { uint32_t x141 = cmovznz(x132, x104, x65);
+ { uint32_t x142 = cmovznz(x132, x101, x62);
+ { uint32_t x143 = cmovznz(x132, x98, x59);
+ { uint32_t x144 = cmovznz(x132, x95, x56);
+ { uint32_t x145 = cmovznz(x132, x92, x53);
+ out[0] = x145;
+ out[1] = x144;
+ out[2] = x143;
+ out[3] = x142;
+ out[4] = x141;
+ out[5] = x140;
+ out[6] = x139;
+ out[7] = x138;
+ out[8] = x137;
+ out[9] = x136;
+ out[10] = x135;
+ out[11] = x134;
+ out[12] = x133;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e413m21/fenz.c b/src/Specific/montgomery32_2e413m21/fenz.c
index 521513c0d..c34513bef 100644
--- a/src/Specific/montgomery32_2e413m21/fenz.c
+++ b/src/Specific/montgomery32_2e413m21/fenz.c
@@ -1,34 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x23, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x25 = (x24 | x23);
-{ uint32_t x26 = (x22 | x25);
-{ uint32_t x27 = (x20 | x26);
-{ uint32_t x28 = (x18 | x27);
-{ uint32_t x29 = (x16 | x28);
-{ uint32_t x30 = (x14 | x29);
-{ uint32_t x31 = (x12 | x30);
-{ uint32_t x32 = (x10 | x31);
-{ uint32_t x33 = (x8 | x32);
-{ uint32_t x34 = (x6 | x33);
-{ uint32_t x35 = (x4 | x34);
-{ uint32_t x36 = (x2 | x35);
-out[0] = x36;
-}}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[13]) {
+ { const uint32_t x23 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x25 = (x24 | x23);
+ { uint32_t x26 = (x22 | x25);
+ { uint32_t x27 = (x20 | x26);
+ { uint32_t x28 = (x18 | x27);
+ { uint32_t x29 = (x16 | x28);
+ { uint32_t x30 = (x14 | x29);
+ { uint32_t x31 = (x12 | x30);
+ { uint32_t x32 = (x10 | x31);
+ { uint32_t x33 = (x8 | x32);
+ { uint32_t x34 = (x6 | x33);
+ { uint32_t x35 = (x4 | x34);
+ { uint32_t x36 = (x2 | x35);
+ out[0] = x36;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e414m17/feadd.c b/src/Specific/montgomery32_2e414m17/feadd.c
index f079eef7b..e84e3c240 100644
--- a/src/Specific/montgomery32_2e414m17/feadd.c
+++ b/src/Specific/montgomery32_2e414m17/feadd.c
@@ -1,74 +1,82 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x50, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29)
-{ uint32_t x53; uint8_t x54 = _addcarryx_u32(0x0, x5, x29, &x53);
-{ uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x7, x31, &x56);
-{ uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x9, x33, &x59);
-{ uint32_t x62; uint8_t x63 = _addcarryx_u32(x60, x11, x35, &x62);
-{ uint32_t x65; uint8_t x66 = _addcarryx_u32(x63, x13, x37, &x65);
-{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x15, x39, &x68);
-{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x17, x41, &x71);
-{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x19, x43, &x74);
-{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x21, x45, &x77);
-{ uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x23, x47, &x80);
-{ uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x25, x49, &x83);
-{ uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x27, x51, &x86);
-{ uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x26, x50, &x89);
-{ uint32_t x92; uint8_t x93 = _subborrow_u32(0x0, x53, 0xffffffef, &x92);
-{ uint32_t x95; uint8_t x96 = _subborrow_u32(x93, x56, 0xffffffff, &x95);
-{ uint32_t x98; uint8_t x99 = _subborrow_u32(x96, x59, 0xffffffff, &x98);
-{ uint32_t x101; uint8_t x102 = _subborrow_u32(x99, x62, 0xffffffff, &x101);
-{ uint32_t x104; uint8_t x105 = _subborrow_u32(x102, x65, 0xffffffff, &x104);
-{ uint32_t x107; uint8_t x108 = _subborrow_u32(x105, x68, 0xffffffff, &x107);
-{ uint32_t x110; uint8_t x111 = _subborrow_u32(x108, x71, 0xffffffff, &x110);
-{ uint32_t x113; uint8_t x114 = _subborrow_u32(x111, x74, 0xffffffff, &x113);
-{ uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x77, 0xffffffff, &x116);
-{ uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x80, 0xffffffff, &x119);
-{ uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x83, 0xffffffff, &x122);
-{ uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x86, 0xffffffff, &x125);
-{ uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x89, 0x3fffffff, &x128);
-{ uint32_t _; uint8_t x132 = _subborrow_u32(x129, x90, 0x0, &_);
-{ uint32_t x133 = cmovznz(x132, x128, x89);
-{ uint32_t x134 = cmovznz(x132, x125, x86);
-{ uint32_t x135 = cmovznz(x132, x122, x83);
-{ uint32_t x136 = cmovznz(x132, x119, x80);
-{ uint32_t x137 = cmovznz(x132, x116, x77);
-{ uint32_t x138 = cmovznz(x132, x113, x74);
-{ uint32_t x139 = cmovznz(x132, x110, x71);
-{ uint32_t x140 = cmovznz(x132, x107, x68);
-{ uint32_t x141 = cmovznz(x132, x104, x65);
-{ uint32_t x142 = cmovznz(x132, x101, x62);
-{ uint32_t x143 = cmovznz(x132, x98, x59);
-{ uint32_t x144 = cmovznz(x132, x95, x56);
-{ uint32_t x145 = cmovznz(x132, x92, x53);
-out[0] = x133;
-out[1] = x134;
-out[2] = x135;
-out[3] = x136;
-out[4] = x137;
-out[5] = x138;
-out[6] = x139;
-out[7] = x140;
-out[8] = x141;
-out[9] = x142;
-out[10] = x143;
-out[11] = x144;
-out[12] = x145;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[13];
+static void feadd(uint32_t out[13], const uint32_t in1[13], const uint32_t in2[13]) {
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x50 = in2[12];
+ { const uint32_t x51 = in2[11];
+ { const uint32_t x49 = in2[10];
+ { const uint32_t x47 = in2[9];
+ { const uint32_t x45 = in2[8];
+ { const uint32_t x43 = in2[7];
+ { const uint32_t x41 = in2[6];
+ { const uint32_t x39 = in2[5];
+ { const uint32_t x37 = in2[4];
+ { const uint32_t x35 = in2[3];
+ { const uint32_t x33 = in2[2];
+ { const uint32_t x31 = in2[1];
+ { const uint32_t x29 = in2[0];
+ { uint32_t x53; uint8_t x54 = _addcarryx_u32(0x0, x5, x29, &x53);
+ { uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x7, x31, &x56);
+ { uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x9, x33, &x59);
+ { uint32_t x62; uint8_t x63 = _addcarryx_u32(x60, x11, x35, &x62);
+ { uint32_t x65; uint8_t x66 = _addcarryx_u32(x63, x13, x37, &x65);
+ { uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x15, x39, &x68);
+ { uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x17, x41, &x71);
+ { uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x19, x43, &x74);
+ { uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x21, x45, &x77);
+ { uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x23, x47, &x80);
+ { uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x25, x49, &x83);
+ { uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x27, x51, &x86);
+ { uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x26, x50, &x89);
+ { uint32_t x92; uint8_t x93 = _subborrow_u32(0x0, x53, 0xffffffef, &x92);
+ { uint32_t x95; uint8_t x96 = _subborrow_u32(x93, x56, 0xffffffff, &x95);
+ { uint32_t x98; uint8_t x99 = _subborrow_u32(x96, x59, 0xffffffff, &x98);
+ { uint32_t x101; uint8_t x102 = _subborrow_u32(x99, x62, 0xffffffff, &x101);
+ { uint32_t x104; uint8_t x105 = _subborrow_u32(x102, x65, 0xffffffff, &x104);
+ { uint32_t x107; uint8_t x108 = _subborrow_u32(x105, x68, 0xffffffff, &x107);
+ { uint32_t x110; uint8_t x111 = _subborrow_u32(x108, x71, 0xffffffff, &x110);
+ { uint32_t x113; uint8_t x114 = _subborrow_u32(x111, x74, 0xffffffff, &x113);
+ { uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x77, 0xffffffff, &x116);
+ { uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x80, 0xffffffff, &x119);
+ { uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x83, 0xffffffff, &x122);
+ { uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x86, 0xffffffff, &x125);
+ { uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x89, 0x3fffffff, &x128);
+ { uint32_t _; uint8_t x132 = _subborrow_u32(x129, x90, 0x0, &_);
+ { uint32_t x133 = cmovznz(x132, x128, x89);
+ { uint32_t x134 = cmovznz(x132, x125, x86);
+ { uint32_t x135 = cmovznz(x132, x122, x83);
+ { uint32_t x136 = cmovznz(x132, x119, x80);
+ { uint32_t x137 = cmovznz(x132, x116, x77);
+ { uint32_t x138 = cmovznz(x132, x113, x74);
+ { uint32_t x139 = cmovznz(x132, x110, x71);
+ { uint32_t x140 = cmovznz(x132, x107, x68);
+ { uint32_t x141 = cmovznz(x132, x104, x65);
+ { uint32_t x142 = cmovznz(x132, x101, x62);
+ { uint32_t x143 = cmovznz(x132, x98, x59);
+ { uint32_t x144 = cmovznz(x132, x95, x56);
+ { uint32_t x145 = cmovznz(x132, x92, x53);
+ out[0] = x145;
+ out[1] = x144;
+ out[2] = x143;
+ out[3] = x142;
+ out[4] = x141;
+ out[5] = x140;
+ out[6] = x139;
+ out[7] = x138;
+ out[8] = x137;
+ out[9] = x136;
+ out[10] = x135;
+ out[11] = x134;
+ out[12] = x133;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e414m17/fenz.c b/src/Specific/montgomery32_2e414m17/fenz.c
index 521513c0d..c34513bef 100644
--- a/src/Specific/montgomery32_2e414m17/fenz.c
+++ b/src/Specific/montgomery32_2e414m17/fenz.c
@@ -1,34 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x23, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x25 = (x24 | x23);
-{ uint32_t x26 = (x22 | x25);
-{ uint32_t x27 = (x20 | x26);
-{ uint32_t x28 = (x18 | x27);
-{ uint32_t x29 = (x16 | x28);
-{ uint32_t x30 = (x14 | x29);
-{ uint32_t x31 = (x12 | x30);
-{ uint32_t x32 = (x10 | x31);
-{ uint32_t x33 = (x8 | x32);
-{ uint32_t x34 = (x6 | x33);
-{ uint32_t x35 = (x4 | x34);
-{ uint32_t x36 = (x2 | x35);
-out[0] = x36;
-}}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[13]) {
+ { const uint32_t x23 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x25 = (x24 | x23);
+ { uint32_t x26 = (x22 | x25);
+ { uint32_t x27 = (x20 | x26);
+ { uint32_t x28 = (x18 | x27);
+ { uint32_t x29 = (x16 | x28);
+ { uint32_t x30 = (x14 | x29);
+ { uint32_t x31 = (x12 | x30);
+ { uint32_t x32 = (x10 | x31);
+ { uint32_t x33 = (x8 | x32);
+ { uint32_t x34 = (x6 | x33);
+ { uint32_t x35 = (x4 | x34);
+ { uint32_t x36 = (x2 | x35);
+ out[0] = x36;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e416m2e208m1/feadd.c b/src/Specific/montgomery32_2e416m2e208m1/feadd.c
index 2e2850e48..18bca8b9a 100644
--- a/src/Specific/montgomery32_2e416m2e208m1/feadd.c
+++ b/src/Specific/montgomery32_2e416m2e208m1/feadd.c
@@ -1,74 +1,82 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x50, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29)
-{ uint32_t x53; uint8_t x54 = _addcarryx_u32(0x0, x5, x29, &x53);
-{ uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x7, x31, &x56);
-{ uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x9, x33, &x59);
-{ uint32_t x62; uint8_t x63 = _addcarryx_u32(x60, x11, x35, &x62);
-{ uint32_t x65; uint8_t x66 = _addcarryx_u32(x63, x13, x37, &x65);
-{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x15, x39, &x68);
-{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x17, x41, &x71);
-{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x19, x43, &x74);
-{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x21, x45, &x77);
-{ uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x23, x47, &x80);
-{ uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x25, x49, &x83);
-{ uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x27, x51, &x86);
-{ uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x26, x50, &x89);
-{ uint32_t x92; uint8_t x93 = _subborrow_u32(0x0, x53, 0xffffffff, &x92);
-{ uint32_t x95; uint8_t x96 = _subborrow_u32(x93, x56, 0xffffffff, &x95);
-{ uint32_t x98; uint8_t x99 = _subborrow_u32(x96, x59, 0xffffffff, &x98);
-{ uint32_t x101; uint8_t x102 = _subborrow_u32(x99, x62, 0xffffffff, &x101);
-{ uint32_t x104; uint8_t x105 = _subborrow_u32(x102, x65, 0xffffffff, &x104);
-{ uint32_t x107; uint8_t x108 = _subborrow_u32(x105, x68, 0xffffffff, &x107);
-{ uint32_t x110; uint8_t x111 = _subborrow_u32(x108, x71, 0xfffeffff, &x110);
-{ uint32_t x113; uint8_t x114 = _subborrow_u32(x111, x74, 0xffffffff, &x113);
-{ uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x77, 0xffffffff, &x116);
-{ uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x80, 0xffffffff, &x119);
-{ uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x83, 0xffffffff, &x122);
-{ uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x86, 0xffffffff, &x125);
-{ uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x89, 0xffffffff, &x128);
-{ uint32_t _; uint8_t x132 = _subborrow_u32(x129, x90, 0x0, &_);
-{ uint32_t x133 = cmovznz(x132, x128, x89);
-{ uint32_t x134 = cmovznz(x132, x125, x86);
-{ uint32_t x135 = cmovznz(x132, x122, x83);
-{ uint32_t x136 = cmovznz(x132, x119, x80);
-{ uint32_t x137 = cmovznz(x132, x116, x77);
-{ uint32_t x138 = cmovznz(x132, x113, x74);
-{ uint32_t x139 = cmovznz(x132, x110, x71);
-{ uint32_t x140 = cmovznz(x132, x107, x68);
-{ uint32_t x141 = cmovznz(x132, x104, x65);
-{ uint32_t x142 = cmovznz(x132, x101, x62);
-{ uint32_t x143 = cmovznz(x132, x98, x59);
-{ uint32_t x144 = cmovznz(x132, x95, x56);
-{ uint32_t x145 = cmovznz(x132, x92, x53);
-out[0] = x133;
-out[1] = x134;
-out[2] = x135;
-out[3] = x136;
-out[4] = x137;
-out[5] = x138;
-out[6] = x139;
-out[7] = x140;
-out[8] = x141;
-out[9] = x142;
-out[10] = x143;
-out[11] = x144;
-out[12] = x145;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[13];
+static void feadd(uint32_t out[13], const uint32_t in1[13], const uint32_t in2[13]) {
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x50 = in2[12];
+ { const uint32_t x51 = in2[11];
+ { const uint32_t x49 = in2[10];
+ { const uint32_t x47 = in2[9];
+ { const uint32_t x45 = in2[8];
+ { const uint32_t x43 = in2[7];
+ { const uint32_t x41 = in2[6];
+ { const uint32_t x39 = in2[5];
+ { const uint32_t x37 = in2[4];
+ { const uint32_t x35 = in2[3];
+ { const uint32_t x33 = in2[2];
+ { const uint32_t x31 = in2[1];
+ { const uint32_t x29 = in2[0];
+ { uint32_t x53; uint8_t x54 = _addcarryx_u32(0x0, x5, x29, &x53);
+ { uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x7, x31, &x56);
+ { uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x9, x33, &x59);
+ { uint32_t x62; uint8_t x63 = _addcarryx_u32(x60, x11, x35, &x62);
+ { uint32_t x65; uint8_t x66 = _addcarryx_u32(x63, x13, x37, &x65);
+ { uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x15, x39, &x68);
+ { uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x17, x41, &x71);
+ { uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x19, x43, &x74);
+ { uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x21, x45, &x77);
+ { uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x23, x47, &x80);
+ { uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x25, x49, &x83);
+ { uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x27, x51, &x86);
+ { uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x26, x50, &x89);
+ { uint32_t x92; uint8_t x93 = _subborrow_u32(0x0, x53, 0xffffffff, &x92);
+ { uint32_t x95; uint8_t x96 = _subborrow_u32(x93, x56, 0xffffffff, &x95);
+ { uint32_t x98; uint8_t x99 = _subborrow_u32(x96, x59, 0xffffffff, &x98);
+ { uint32_t x101; uint8_t x102 = _subborrow_u32(x99, x62, 0xffffffff, &x101);
+ { uint32_t x104; uint8_t x105 = _subborrow_u32(x102, x65, 0xffffffff, &x104);
+ { uint32_t x107; uint8_t x108 = _subborrow_u32(x105, x68, 0xffffffff, &x107);
+ { uint32_t x110; uint8_t x111 = _subborrow_u32(x108, x71, 0xfffeffff, &x110);
+ { uint32_t x113; uint8_t x114 = _subborrow_u32(x111, x74, 0xffffffff, &x113);
+ { uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x77, 0xffffffff, &x116);
+ { uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x80, 0xffffffff, &x119);
+ { uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x83, 0xffffffff, &x122);
+ { uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x86, 0xffffffff, &x125);
+ { uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x89, 0xffffffff, &x128);
+ { uint32_t _; uint8_t x132 = _subborrow_u32(x129, x90, 0x0, &_);
+ { uint32_t x133 = cmovznz(x132, x128, x89);
+ { uint32_t x134 = cmovznz(x132, x125, x86);
+ { uint32_t x135 = cmovznz(x132, x122, x83);
+ { uint32_t x136 = cmovznz(x132, x119, x80);
+ { uint32_t x137 = cmovznz(x132, x116, x77);
+ { uint32_t x138 = cmovznz(x132, x113, x74);
+ { uint32_t x139 = cmovznz(x132, x110, x71);
+ { uint32_t x140 = cmovznz(x132, x107, x68);
+ { uint32_t x141 = cmovznz(x132, x104, x65);
+ { uint32_t x142 = cmovznz(x132, x101, x62);
+ { uint32_t x143 = cmovznz(x132, x98, x59);
+ { uint32_t x144 = cmovznz(x132, x95, x56);
+ { uint32_t x145 = cmovznz(x132, x92, x53);
+ out[0] = x145;
+ out[1] = x144;
+ out[2] = x143;
+ out[3] = x142;
+ out[4] = x141;
+ out[5] = x140;
+ out[6] = x139;
+ out[7] = x138;
+ out[8] = x137;
+ out[9] = x136;
+ out[10] = x135;
+ out[11] = x134;
+ out[12] = x133;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e416m2e208m1/fenz.c b/src/Specific/montgomery32_2e416m2e208m1/fenz.c
index 521513c0d..c34513bef 100644
--- a/src/Specific/montgomery32_2e416m2e208m1/fenz.c
+++ b/src/Specific/montgomery32_2e416m2e208m1/fenz.c
@@ -1,34 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x23, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x25 = (x24 | x23);
-{ uint32_t x26 = (x22 | x25);
-{ uint32_t x27 = (x20 | x26);
-{ uint32_t x28 = (x18 | x27);
-{ uint32_t x29 = (x16 | x28);
-{ uint32_t x30 = (x14 | x29);
-{ uint32_t x31 = (x12 | x30);
-{ uint32_t x32 = (x10 | x31);
-{ uint32_t x33 = (x8 | x32);
-{ uint32_t x34 = (x6 | x33);
-{ uint32_t x35 = (x4 | x34);
-{ uint32_t x36 = (x2 | x35);
-out[0] = x36;
-}}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[13]) {
+ { const uint32_t x23 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x25 = (x24 | x23);
+ { uint32_t x26 = (x22 | x25);
+ { uint32_t x27 = (x20 | x26);
+ { uint32_t x28 = (x18 | x27);
+ { uint32_t x29 = (x16 | x28);
+ { uint32_t x30 = (x14 | x29);
+ { uint32_t x31 = (x12 | x30);
+ { uint32_t x32 = (x10 | x31);
+ { uint32_t x33 = (x8 | x32);
+ { uint32_t x34 = (x6 | x33);
+ { uint32_t x35 = (x4 | x34);
+ { uint32_t x36 = (x2 | x35);
+ out[0] = x36;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e444m17/feadd.c b/src/Specific/montgomery32_2e444m17/feadd.c
index f87eed2d7..07dfe2dff 100644
--- a/src/Specific/montgomery32_2e444m17/feadd.c
+++ b/src/Specific/montgomery32_2e444m17/feadd.c
@@ -1,78 +1,88 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x28, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x54, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31)
-{ uint32_t x57; uint8_t x58 = _addcarryx_u32(0x0, x5, x31, &x57);
-{ uint32_t x60; uint8_t x61 = _addcarryx_u32(x58, x7, x33, &x60);
-{ uint32_t x63; uint8_t x64 = _addcarryx_u32(x61, x9, x35, &x63);
-{ uint32_t x66; uint8_t x67 = _addcarryx_u32(x64, x11, x37, &x66);
-{ uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x13, x39, &x69);
-{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x15, x41, &x72);
-{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x17, x43, &x75);
-{ uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x19, x45, &x78);
-{ uint32_t x81; uint8_t x82 = _addcarryx_u32(x79, x21, x47, &x81);
-{ uint32_t x84; uint8_t x85 = _addcarryx_u32(x82, x23, x49, &x84);
-{ uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x25, x51, &x87);
-{ uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x27, x53, &x90);
-{ uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x29, x55, &x93);
-{ uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x28, x54, &x96);
-{ uint32_t x99; uint8_t x100 = _subborrow_u32(0x0, x57, 0xffffffef, &x99);
-{ uint32_t x102; uint8_t x103 = _subborrow_u32(x100, x60, 0xffffffff, &x102);
-{ uint32_t x105; uint8_t x106 = _subborrow_u32(x103, x63, 0xffffffff, &x105);
-{ uint32_t x108; uint8_t x109 = _subborrow_u32(x106, x66, 0xffffffff, &x108);
-{ uint32_t x111; uint8_t x112 = _subborrow_u32(x109, x69, 0xffffffff, &x111);
-{ uint32_t x114; uint8_t x115 = _subborrow_u32(x112, x72, 0xffffffff, &x114);
-{ uint32_t x117; uint8_t x118 = _subborrow_u32(x115, x75, 0xffffffff, &x117);
-{ uint32_t x120; uint8_t x121 = _subborrow_u32(x118, x78, 0xffffffff, &x120);
-{ uint32_t x123; uint8_t x124 = _subborrow_u32(x121, x81, 0xffffffff, &x123);
-{ uint32_t x126; uint8_t x127 = _subborrow_u32(x124, x84, 0xffffffff, &x126);
-{ uint32_t x129; uint8_t x130 = _subborrow_u32(x127, x87, 0xffffffff, &x129);
-{ uint32_t x132; uint8_t x133 = _subborrow_u32(x130, x90, 0xffffffff, &x132);
-{ uint32_t x135; uint8_t x136 = _subborrow_u32(x133, x93, 0xffffffff, &x135);
-{ uint32_t x138; uint8_t x139 = _subborrow_u32(x136, x96, 0xfffffff, &x138);
-{ uint32_t _; uint8_t x142 = _subborrow_u32(x139, x97, 0x0, &_);
-{ uint32_t x143 = cmovznz(x142, x138, x96);
-{ uint32_t x144 = cmovznz(x142, x135, x93);
-{ uint32_t x145 = cmovznz(x142, x132, x90);
-{ uint32_t x146 = cmovznz(x142, x129, x87);
-{ uint32_t x147 = cmovznz(x142, x126, x84);
-{ uint32_t x148 = cmovznz(x142, x123, x81);
-{ uint32_t x149 = cmovznz(x142, x120, x78);
-{ uint32_t x150 = cmovznz(x142, x117, x75);
-{ uint32_t x151 = cmovznz(x142, x114, x72);
-{ uint32_t x152 = cmovznz(x142, x111, x69);
-{ uint32_t x153 = cmovznz(x142, x108, x66);
-{ uint32_t x154 = cmovznz(x142, x105, x63);
-{ uint32_t x155 = cmovznz(x142, x102, x60);
-{ uint32_t x156 = cmovznz(x142, x99, x57);
-out[0] = x143;
-out[1] = x144;
-out[2] = x145;
-out[3] = x146;
-out[4] = x147;
-out[5] = x148;
-out[6] = x149;
-out[7] = x150;
-out[8] = x151;
-out[9] = x152;
-out[10] = x153;
-out[11] = x154;
-out[12] = x155;
-out[13] = x156;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[14];
+static void feadd(uint32_t out[14], const uint32_t in1[14], const uint32_t in2[14]) {
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x54 = in2[13];
+ { const uint32_t x55 = in2[12];
+ { const uint32_t x53 = in2[11];
+ { const uint32_t x51 = in2[10];
+ { const uint32_t x49 = in2[9];
+ { const uint32_t x47 = in2[8];
+ { const uint32_t x45 = in2[7];
+ { const uint32_t x43 = in2[6];
+ { const uint32_t x41 = in2[5];
+ { const uint32_t x39 = in2[4];
+ { const uint32_t x37 = in2[3];
+ { const uint32_t x35 = in2[2];
+ { const uint32_t x33 = in2[1];
+ { const uint32_t x31 = in2[0];
+ { uint32_t x57; uint8_t x58 = _addcarryx_u32(0x0, x5, x31, &x57);
+ { uint32_t x60; uint8_t x61 = _addcarryx_u32(x58, x7, x33, &x60);
+ { uint32_t x63; uint8_t x64 = _addcarryx_u32(x61, x9, x35, &x63);
+ { uint32_t x66; uint8_t x67 = _addcarryx_u32(x64, x11, x37, &x66);
+ { uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x13, x39, &x69);
+ { uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x15, x41, &x72);
+ { uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x17, x43, &x75);
+ { uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x19, x45, &x78);
+ { uint32_t x81; uint8_t x82 = _addcarryx_u32(x79, x21, x47, &x81);
+ { uint32_t x84; uint8_t x85 = _addcarryx_u32(x82, x23, x49, &x84);
+ { uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x25, x51, &x87);
+ { uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x27, x53, &x90);
+ { uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x29, x55, &x93);
+ { uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x28, x54, &x96);
+ { uint32_t x99; uint8_t x100 = _subborrow_u32(0x0, x57, 0xffffffef, &x99);
+ { uint32_t x102; uint8_t x103 = _subborrow_u32(x100, x60, 0xffffffff, &x102);
+ { uint32_t x105; uint8_t x106 = _subborrow_u32(x103, x63, 0xffffffff, &x105);
+ { uint32_t x108; uint8_t x109 = _subborrow_u32(x106, x66, 0xffffffff, &x108);
+ { uint32_t x111; uint8_t x112 = _subborrow_u32(x109, x69, 0xffffffff, &x111);
+ { uint32_t x114; uint8_t x115 = _subborrow_u32(x112, x72, 0xffffffff, &x114);
+ { uint32_t x117; uint8_t x118 = _subborrow_u32(x115, x75, 0xffffffff, &x117);
+ { uint32_t x120; uint8_t x121 = _subborrow_u32(x118, x78, 0xffffffff, &x120);
+ { uint32_t x123; uint8_t x124 = _subborrow_u32(x121, x81, 0xffffffff, &x123);
+ { uint32_t x126; uint8_t x127 = _subborrow_u32(x124, x84, 0xffffffff, &x126);
+ { uint32_t x129; uint8_t x130 = _subborrow_u32(x127, x87, 0xffffffff, &x129);
+ { uint32_t x132; uint8_t x133 = _subborrow_u32(x130, x90, 0xffffffff, &x132);
+ { uint32_t x135; uint8_t x136 = _subborrow_u32(x133, x93, 0xffffffff, &x135);
+ { uint32_t x138; uint8_t x139 = _subborrow_u32(x136, x96, 0xfffffff, &x138);
+ { uint32_t _; uint8_t x142 = _subborrow_u32(x139, x97, 0x0, &_);
+ { uint32_t x143 = cmovznz(x142, x138, x96);
+ { uint32_t x144 = cmovznz(x142, x135, x93);
+ { uint32_t x145 = cmovznz(x142, x132, x90);
+ { uint32_t x146 = cmovznz(x142, x129, x87);
+ { uint32_t x147 = cmovznz(x142, x126, x84);
+ { uint32_t x148 = cmovznz(x142, x123, x81);
+ { uint32_t x149 = cmovznz(x142, x120, x78);
+ { uint32_t x150 = cmovznz(x142, x117, x75);
+ { uint32_t x151 = cmovznz(x142, x114, x72);
+ { uint32_t x152 = cmovznz(x142, x111, x69);
+ { uint32_t x153 = cmovznz(x142, x108, x66);
+ { uint32_t x154 = cmovznz(x142, x105, x63);
+ { uint32_t x155 = cmovznz(x142, x102, x60);
+ { uint32_t x156 = cmovznz(x142, x99, x57);
+ out[0] = x156;
+ out[1] = x155;
+ out[2] = x154;
+ out[3] = x153;
+ out[4] = x152;
+ out[5] = x151;
+ out[6] = x150;
+ out[7] = x149;
+ out[8] = x148;
+ out[9] = x147;
+ out[10] = x146;
+ out[11] = x145;
+ out[12] = x144;
+ out[13] = x143;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e444m17/fenz.c b/src/Specific/montgomery32_2e444m17/fenz.c
index d631822fd..42fc27af3 100644
--- a/src/Specific/montgomery32_2e444m17/fenz.c
+++ b/src/Specific/montgomery32_2e444m17/fenz.c
@@ -1,35 +1,31 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x25, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x27 = (x26 | x25);
-{ uint32_t x28 = (x24 | x27);
-{ uint32_t x29 = (x22 | x28);
-{ uint32_t x30 = (x20 | x29);
-{ uint32_t x31 = (x18 | x30);
-{ uint32_t x32 = (x16 | x31);
-{ uint32_t x33 = (x14 | x32);
-{ uint32_t x34 = (x12 | x33);
-{ uint32_t x35 = (x10 | x34);
-{ uint32_t x36 = (x8 | x35);
-{ uint32_t x37 = (x6 | x36);
-{ uint32_t x38 = (x4 | x37);
-{ uint32_t x39 = (x2 | x38);
-out[0] = x39;
-}}}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[14]) {
+ { const uint32_t x25 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x27 = (x26 | x25);
+ { uint32_t x28 = (x24 | x27);
+ { uint32_t x29 = (x22 | x28);
+ { uint32_t x30 = (x20 | x29);
+ { uint32_t x31 = (x18 | x30);
+ { uint32_t x32 = (x16 | x31);
+ { uint32_t x33 = (x14 | x32);
+ { uint32_t x34 = (x12 | x33);
+ { uint32_t x35 = (x10 | x34);
+ { uint32_t x36 = (x8 | x35);
+ { uint32_t x37 = (x6 | x36);
+ { uint32_t x38 = (x4 | x37);
+ { uint32_t x39 = (x2 | x38);
+ out[0] = x39;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e448m2e224m1/feadd.c b/src/Specific/montgomery32_2e448m2e224m1/feadd.c
index 5eb3e19d3..8308e25e9 100644
--- a/src/Specific/montgomery32_2e448m2e224m1/feadd.c
+++ b/src/Specific/montgomery32_2e448m2e224m1/feadd.c
@@ -1,78 +1,88 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x28, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x54, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31)
-{ uint32_t x57; uint8_t x58 = _addcarryx_u32(0x0, x5, x31, &x57);
-{ uint32_t x60; uint8_t x61 = _addcarryx_u32(x58, x7, x33, &x60);
-{ uint32_t x63; uint8_t x64 = _addcarryx_u32(x61, x9, x35, &x63);
-{ uint32_t x66; uint8_t x67 = _addcarryx_u32(x64, x11, x37, &x66);
-{ uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x13, x39, &x69);
-{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x15, x41, &x72);
-{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x17, x43, &x75);
-{ uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x19, x45, &x78);
-{ uint32_t x81; uint8_t x82 = _addcarryx_u32(x79, x21, x47, &x81);
-{ uint32_t x84; uint8_t x85 = _addcarryx_u32(x82, x23, x49, &x84);
-{ uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x25, x51, &x87);
-{ uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x27, x53, &x90);
-{ uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x29, x55, &x93);
-{ uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x28, x54, &x96);
-{ uint32_t x99; uint8_t x100 = _subborrow_u32(0x0, x57, 0xffffffff, &x99);
-{ uint32_t x102; uint8_t x103 = _subborrow_u32(x100, x60, 0xffffffff, &x102);
-{ uint32_t x105; uint8_t x106 = _subborrow_u32(x103, x63, 0xffffffff, &x105);
-{ uint32_t x108; uint8_t x109 = _subborrow_u32(x106, x66, 0xffffffff, &x108);
-{ uint32_t x111; uint8_t x112 = _subborrow_u32(x109, x69, 0xffffffff, &x111);
-{ uint32_t x114; uint8_t x115 = _subborrow_u32(x112, x72, 0xffffffff, &x114);
-{ uint32_t x117; uint8_t x118 = _subborrow_u32(x115, x75, 0xffffffff, &x117);
-{ uint32_t x120; uint8_t x121 = _subborrow_u32(x118, x78, 0xfffffffe, &x120);
-{ uint32_t x123; uint8_t x124 = _subborrow_u32(x121, x81, 0xffffffff, &x123);
-{ uint32_t x126; uint8_t x127 = _subborrow_u32(x124, x84, 0xffffffff, &x126);
-{ uint32_t x129; uint8_t x130 = _subborrow_u32(x127, x87, 0xffffffff, &x129);
-{ uint32_t x132; uint8_t x133 = _subborrow_u32(x130, x90, 0xffffffff, &x132);
-{ uint32_t x135; uint8_t x136 = _subborrow_u32(x133, x93, 0xffffffff, &x135);
-{ uint32_t x138; uint8_t x139 = _subborrow_u32(x136, x96, 0xffffffff, &x138);
-{ uint32_t _; uint8_t x142 = _subborrow_u32(x139, x97, 0x0, &_);
-{ uint32_t x143 = cmovznz(x142, x138, x96);
-{ uint32_t x144 = cmovznz(x142, x135, x93);
-{ uint32_t x145 = cmovznz(x142, x132, x90);
-{ uint32_t x146 = cmovznz(x142, x129, x87);
-{ uint32_t x147 = cmovznz(x142, x126, x84);
-{ uint32_t x148 = cmovznz(x142, x123, x81);
-{ uint32_t x149 = cmovznz(x142, x120, x78);
-{ uint32_t x150 = cmovznz(x142, x117, x75);
-{ uint32_t x151 = cmovznz(x142, x114, x72);
-{ uint32_t x152 = cmovznz(x142, x111, x69);
-{ uint32_t x153 = cmovznz(x142, x108, x66);
-{ uint32_t x154 = cmovznz(x142, x105, x63);
-{ uint32_t x155 = cmovznz(x142, x102, x60);
-{ uint32_t x156 = cmovznz(x142, x99, x57);
-out[0] = x143;
-out[1] = x144;
-out[2] = x145;
-out[3] = x146;
-out[4] = x147;
-out[5] = x148;
-out[6] = x149;
-out[7] = x150;
-out[8] = x151;
-out[9] = x152;
-out[10] = x153;
-out[11] = x154;
-out[12] = x155;
-out[13] = x156;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[14];
+static void feadd(uint32_t out[14], const uint32_t in1[14], const uint32_t in2[14]) {
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x54 = in2[13];
+ { const uint32_t x55 = in2[12];
+ { const uint32_t x53 = in2[11];
+ { const uint32_t x51 = in2[10];
+ { const uint32_t x49 = in2[9];
+ { const uint32_t x47 = in2[8];
+ { const uint32_t x45 = in2[7];
+ { const uint32_t x43 = in2[6];
+ { const uint32_t x41 = in2[5];
+ { const uint32_t x39 = in2[4];
+ { const uint32_t x37 = in2[3];
+ { const uint32_t x35 = in2[2];
+ { const uint32_t x33 = in2[1];
+ { const uint32_t x31 = in2[0];
+ { uint32_t x57; uint8_t x58 = _addcarryx_u32(0x0, x5, x31, &x57);
+ { uint32_t x60; uint8_t x61 = _addcarryx_u32(x58, x7, x33, &x60);
+ { uint32_t x63; uint8_t x64 = _addcarryx_u32(x61, x9, x35, &x63);
+ { uint32_t x66; uint8_t x67 = _addcarryx_u32(x64, x11, x37, &x66);
+ { uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x13, x39, &x69);
+ { uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x15, x41, &x72);
+ { uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x17, x43, &x75);
+ { uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x19, x45, &x78);
+ { uint32_t x81; uint8_t x82 = _addcarryx_u32(x79, x21, x47, &x81);
+ { uint32_t x84; uint8_t x85 = _addcarryx_u32(x82, x23, x49, &x84);
+ { uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x25, x51, &x87);
+ { uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x27, x53, &x90);
+ { uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x29, x55, &x93);
+ { uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x28, x54, &x96);
+ { uint32_t x99; uint8_t x100 = _subborrow_u32(0x0, x57, 0xffffffff, &x99);
+ { uint32_t x102; uint8_t x103 = _subborrow_u32(x100, x60, 0xffffffff, &x102);
+ { uint32_t x105; uint8_t x106 = _subborrow_u32(x103, x63, 0xffffffff, &x105);
+ { uint32_t x108; uint8_t x109 = _subborrow_u32(x106, x66, 0xffffffff, &x108);
+ { uint32_t x111; uint8_t x112 = _subborrow_u32(x109, x69, 0xffffffff, &x111);
+ { uint32_t x114; uint8_t x115 = _subborrow_u32(x112, x72, 0xffffffff, &x114);
+ { uint32_t x117; uint8_t x118 = _subborrow_u32(x115, x75, 0xffffffff, &x117);
+ { uint32_t x120; uint8_t x121 = _subborrow_u32(x118, x78, 0xfffffffe, &x120);
+ { uint32_t x123; uint8_t x124 = _subborrow_u32(x121, x81, 0xffffffff, &x123);
+ { uint32_t x126; uint8_t x127 = _subborrow_u32(x124, x84, 0xffffffff, &x126);
+ { uint32_t x129; uint8_t x130 = _subborrow_u32(x127, x87, 0xffffffff, &x129);
+ { uint32_t x132; uint8_t x133 = _subborrow_u32(x130, x90, 0xffffffff, &x132);
+ { uint32_t x135; uint8_t x136 = _subborrow_u32(x133, x93, 0xffffffff, &x135);
+ { uint32_t x138; uint8_t x139 = _subborrow_u32(x136, x96, 0xffffffff, &x138);
+ { uint32_t _; uint8_t x142 = _subborrow_u32(x139, x97, 0x0, &_);
+ { uint32_t x143 = cmovznz(x142, x138, x96);
+ { uint32_t x144 = cmovznz(x142, x135, x93);
+ { uint32_t x145 = cmovznz(x142, x132, x90);
+ { uint32_t x146 = cmovznz(x142, x129, x87);
+ { uint32_t x147 = cmovznz(x142, x126, x84);
+ { uint32_t x148 = cmovznz(x142, x123, x81);
+ { uint32_t x149 = cmovznz(x142, x120, x78);
+ { uint32_t x150 = cmovznz(x142, x117, x75);
+ { uint32_t x151 = cmovznz(x142, x114, x72);
+ { uint32_t x152 = cmovznz(x142, x111, x69);
+ { uint32_t x153 = cmovznz(x142, x108, x66);
+ { uint32_t x154 = cmovznz(x142, x105, x63);
+ { uint32_t x155 = cmovznz(x142, x102, x60);
+ { uint32_t x156 = cmovznz(x142, x99, x57);
+ out[0] = x156;
+ out[1] = x155;
+ out[2] = x154;
+ out[3] = x153;
+ out[4] = x152;
+ out[5] = x151;
+ out[6] = x150;
+ out[7] = x149;
+ out[8] = x148;
+ out[9] = x147;
+ out[10] = x146;
+ out[11] = x145;
+ out[12] = x144;
+ out[13] = x143;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e448m2e224m1/fenz.c b/src/Specific/montgomery32_2e448m2e224m1/fenz.c
index d631822fd..42fc27af3 100644
--- a/src/Specific/montgomery32_2e448m2e224m1/fenz.c
+++ b/src/Specific/montgomery32_2e448m2e224m1/fenz.c
@@ -1,35 +1,31 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x25, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x27 = (x26 | x25);
-{ uint32_t x28 = (x24 | x27);
-{ uint32_t x29 = (x22 | x28);
-{ uint32_t x30 = (x20 | x29);
-{ uint32_t x31 = (x18 | x30);
-{ uint32_t x32 = (x16 | x31);
-{ uint32_t x33 = (x14 | x32);
-{ uint32_t x34 = (x12 | x33);
-{ uint32_t x35 = (x10 | x34);
-{ uint32_t x36 = (x8 | x35);
-{ uint32_t x37 = (x6 | x36);
-{ uint32_t x38 = (x4 | x37);
-{ uint32_t x39 = (x2 | x38);
-out[0] = x39;
-}}}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[14]) {
+ { const uint32_t x25 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x27 = (x26 | x25);
+ { uint32_t x28 = (x24 | x27);
+ { uint32_t x29 = (x22 | x28);
+ { uint32_t x30 = (x20 | x29);
+ { uint32_t x31 = (x18 | x30);
+ { uint32_t x32 = (x16 | x31);
+ { uint32_t x33 = (x14 | x32);
+ { uint32_t x34 = (x12 | x33);
+ { uint32_t x35 = (x10 | x34);
+ { uint32_t x36 = (x8 | x35);
+ { uint32_t x37 = (x6 | x36);
+ { uint32_t x38 = (x4 | x37);
+ { uint32_t x39 = (x2 | x38);
+ out[0] = x39;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e450m2e225m1/feadd.c b/src/Specific/montgomery32_2e450m2e225m1/feadd.c
index 3290f356a..64f201c18 100644
--- a/src/Specific/montgomery32_2e450m2e225m1/feadd.c
+++ b/src/Specific/montgomery32_2e450m2e225m1/feadd.c
@@ -1,82 +1,94 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x58, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33)
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(0x0, x5, x33, &x61);
-{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x7, x35, &x64);
-{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x9, x37, &x67);
-{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x11, x39, &x70);
-{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x13, x41, &x73);
-{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x15, x43, &x76);
-{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x17, x45, &x79);
-{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x19, x47, &x82);
-{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x21, x49, &x85);
-{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x23, x51, &x88);
-{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x25, x53, &x91);
-{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x27, x55, &x94);
-{ uint32_t x97; uint8_t x98 = _addcarryx_u32(x95, x29, x57, &x97);
-{ uint32_t x100; uint8_t x101 = _addcarryx_u32(x98, x31, x59, &x100);
-{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x30, x58, &x103);
-{ uint32_t x106; uint8_t x107 = _subborrow_u32(0x0, x61, 0xffffffff, &x106);
-{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x64, 0xffffffff, &x109);
-{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x67, 0xffffffff, &x112);
-{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x70, 0xffffffff, &x115);
-{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x73, 0xffffffff, &x118);
-{ uint32_t x121; uint8_t x122 = _subborrow_u32(x119, x76, 0xffffffff, &x121);
-{ uint32_t x124; uint8_t x125 = _subborrow_u32(x122, x79, 0xffffffff, &x124);
-{ uint32_t x127; uint8_t x128 = _subborrow_u32(x125, x82, 0xfffffffd, &x127);
-{ uint32_t x130; uint8_t x131 = _subborrow_u32(x128, x85, 0xffffffff, &x130);
-{ uint32_t x133; uint8_t x134 = _subborrow_u32(x131, x88, 0xffffffff, &x133);
-{ uint32_t x136; uint8_t x137 = _subborrow_u32(x134, x91, 0xffffffff, &x136);
-{ uint32_t x139; uint8_t x140 = _subborrow_u32(x137, x94, 0xffffffff, &x139);
-{ uint32_t x142; uint8_t x143 = _subborrow_u32(x140, x97, 0xffffffff, &x142);
-{ uint32_t x145; uint8_t x146 = _subborrow_u32(x143, x100, 0xffffffff, &x145);
-{ uint32_t x148; uint8_t x149 = _subborrow_u32(x146, x103, 0x3, &x148);
-{ uint32_t _; uint8_t x152 = _subborrow_u32(x149, x104, 0x0, &_);
-{ uint32_t x153 = cmovznz(x152, x148, x103);
-{ uint32_t x154 = cmovznz(x152, x145, x100);
-{ uint32_t x155 = cmovznz(x152, x142, x97);
-{ uint32_t x156 = cmovznz(x152, x139, x94);
-{ uint32_t x157 = cmovznz(x152, x136, x91);
-{ uint32_t x158 = cmovznz(x152, x133, x88);
-{ uint32_t x159 = cmovznz(x152, x130, x85);
-{ uint32_t x160 = cmovznz(x152, x127, x82);
-{ uint32_t x161 = cmovznz(x152, x124, x79);
-{ uint32_t x162 = cmovznz(x152, x121, x76);
-{ uint32_t x163 = cmovznz(x152, x118, x73);
-{ uint32_t x164 = cmovznz(x152, x115, x70);
-{ uint32_t x165 = cmovznz(x152, x112, x67);
-{ uint32_t x166 = cmovznz(x152, x109, x64);
-{ uint32_t x167 = cmovznz(x152, x106, x61);
-out[0] = x153;
-out[1] = x154;
-out[2] = x155;
-out[3] = x156;
-out[4] = x157;
-out[5] = x158;
-out[6] = x159;
-out[7] = x160;
-out[8] = x161;
-out[9] = x162;
-out[10] = x163;
-out[11] = x164;
-out[12] = x165;
-out[13] = x166;
-out[14] = x167;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[15];
+static void feadd(uint32_t out[15], const uint32_t in1[15], const uint32_t in2[15]) {
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x31 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x58 = in2[14];
+ { const uint32_t x59 = in2[13];
+ { const uint32_t x57 = in2[12];
+ { const uint32_t x55 = in2[11];
+ { const uint32_t x53 = in2[10];
+ { const uint32_t x51 = in2[9];
+ { const uint32_t x49 = in2[8];
+ { const uint32_t x47 = in2[7];
+ { const uint32_t x45 = in2[6];
+ { const uint32_t x43 = in2[5];
+ { const uint32_t x41 = in2[4];
+ { const uint32_t x39 = in2[3];
+ { const uint32_t x37 = in2[2];
+ { const uint32_t x35 = in2[1];
+ { const uint32_t x33 = in2[0];
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(0x0, x5, x33, &x61);
+ { uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x7, x35, &x64);
+ { uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x9, x37, &x67);
+ { uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x11, x39, &x70);
+ { uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x13, x41, &x73);
+ { uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x15, x43, &x76);
+ { uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x17, x45, &x79);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x19, x47, &x82);
+ { uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x21, x49, &x85);
+ { uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x23, x51, &x88);
+ { uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x25, x53, &x91);
+ { uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x27, x55, &x94);
+ { uint32_t x97; uint8_t x98 = _addcarryx_u32(x95, x29, x57, &x97);
+ { uint32_t x100; uint8_t x101 = _addcarryx_u32(x98, x31, x59, &x100);
+ { uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x30, x58, &x103);
+ { uint32_t x106; uint8_t x107 = _subborrow_u32(0x0, x61, 0xffffffff, &x106);
+ { uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x64, 0xffffffff, &x109);
+ { uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x67, 0xffffffff, &x112);
+ { uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x70, 0xffffffff, &x115);
+ { uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x73, 0xffffffff, &x118);
+ { uint32_t x121; uint8_t x122 = _subborrow_u32(x119, x76, 0xffffffff, &x121);
+ { uint32_t x124; uint8_t x125 = _subborrow_u32(x122, x79, 0xffffffff, &x124);
+ { uint32_t x127; uint8_t x128 = _subborrow_u32(x125, x82, 0xfffffffd, &x127);
+ { uint32_t x130; uint8_t x131 = _subborrow_u32(x128, x85, 0xffffffff, &x130);
+ { uint32_t x133; uint8_t x134 = _subborrow_u32(x131, x88, 0xffffffff, &x133);
+ { uint32_t x136; uint8_t x137 = _subborrow_u32(x134, x91, 0xffffffff, &x136);
+ { uint32_t x139; uint8_t x140 = _subborrow_u32(x137, x94, 0xffffffff, &x139);
+ { uint32_t x142; uint8_t x143 = _subborrow_u32(x140, x97, 0xffffffff, &x142);
+ { uint32_t x145; uint8_t x146 = _subborrow_u32(x143, x100, 0xffffffff, &x145);
+ { uint32_t x148; uint8_t x149 = _subborrow_u32(x146, x103, 0x3, &x148);
+ { uint32_t _; uint8_t x152 = _subborrow_u32(x149, x104, 0x0, &_);
+ { uint32_t x153 = cmovznz(x152, x148, x103);
+ { uint32_t x154 = cmovznz(x152, x145, x100);
+ { uint32_t x155 = cmovznz(x152, x142, x97);
+ { uint32_t x156 = cmovznz(x152, x139, x94);
+ { uint32_t x157 = cmovznz(x152, x136, x91);
+ { uint32_t x158 = cmovznz(x152, x133, x88);
+ { uint32_t x159 = cmovznz(x152, x130, x85);
+ { uint32_t x160 = cmovznz(x152, x127, x82);
+ { uint32_t x161 = cmovznz(x152, x124, x79);
+ { uint32_t x162 = cmovznz(x152, x121, x76);
+ { uint32_t x163 = cmovznz(x152, x118, x73);
+ { uint32_t x164 = cmovznz(x152, x115, x70);
+ { uint32_t x165 = cmovznz(x152, x112, x67);
+ { uint32_t x166 = cmovznz(x152, x109, x64);
+ { uint32_t x167 = cmovznz(x152, x106, x61);
+ out[0] = x167;
+ out[1] = x166;
+ out[2] = x165;
+ out[3] = x164;
+ out[4] = x163;
+ out[5] = x162;
+ out[6] = x161;
+ out[7] = x160;
+ out[8] = x159;
+ out[9] = x158;
+ out[10] = x157;
+ out[11] = x156;
+ out[12] = x155;
+ out[13] = x154;
+ out[14] = x153;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e450m2e225m1/fenz.c b/src/Specific/montgomery32_2e450m2e225m1/fenz.c
index 76004e346..d66853caf 100644
--- a/src/Specific/montgomery32_2e450m2e225m1/fenz.c
+++ b/src/Specific/montgomery32_2e450m2e225m1/fenz.c
@@ -1,36 +1,33 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x29 = (x28 | x27);
-{ uint32_t x30 = (x26 | x29);
-{ uint32_t x31 = (x24 | x30);
-{ uint32_t x32 = (x22 | x31);
-{ uint32_t x33 = (x20 | x32);
-{ uint32_t x34 = (x18 | x33);
-{ uint32_t x35 = (x16 | x34);
-{ uint32_t x36 = (x14 | x35);
-{ uint32_t x37 = (x12 | x36);
-{ uint32_t x38 = (x10 | x37);
-{ uint32_t x39 = (x8 | x38);
-{ uint32_t x40 = (x6 | x39);
-{ uint32_t x41 = (x4 | x40);
-{ uint32_t x42 = (x2 | x41);
-out[0] = x42;
-}}}}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[15]) {
+ { const uint32_t x27 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x29 = (x28 | x27);
+ { uint32_t x30 = (x26 | x29);
+ { uint32_t x31 = (x24 | x30);
+ { uint32_t x32 = (x22 | x31);
+ { uint32_t x33 = (x20 | x32);
+ { uint32_t x34 = (x18 | x33);
+ { uint32_t x35 = (x16 | x34);
+ { uint32_t x36 = (x14 | x35);
+ { uint32_t x37 = (x12 | x36);
+ { uint32_t x38 = (x10 | x37);
+ { uint32_t x39 = (x8 | x38);
+ { uint32_t x40 = (x6 | x39);
+ { uint32_t x41 = (x4 | x40);
+ { uint32_t x42 = (x2 | x41);
+ out[0] = x42;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e452m3/feadd.c b/src/Specific/montgomery32_2e452m3/feadd.c
index b156fe974..dfd707107 100644
--- a/src/Specific/montgomery32_2e452m3/feadd.c
+++ b/src/Specific/montgomery32_2e452m3/feadd.c
@@ -1,82 +1,94 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x58, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33)
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(0x0, x5, x33, &x61);
-{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x7, x35, &x64);
-{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x9, x37, &x67);
-{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x11, x39, &x70);
-{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x13, x41, &x73);
-{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x15, x43, &x76);
-{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x17, x45, &x79);
-{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x19, x47, &x82);
-{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x21, x49, &x85);
-{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x23, x51, &x88);
-{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x25, x53, &x91);
-{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x27, x55, &x94);
-{ uint32_t x97; uint8_t x98 = _addcarryx_u32(x95, x29, x57, &x97);
-{ uint32_t x100; uint8_t x101 = _addcarryx_u32(x98, x31, x59, &x100);
-{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x30, x58, &x103);
-{ uint32_t x106; uint8_t x107 = _subborrow_u32(0x0, x61, 0xfffffffd, &x106);
-{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x64, 0xffffffff, &x109);
-{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x67, 0xffffffff, &x112);
-{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x70, 0xffffffff, &x115);
-{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x73, 0xffffffff, &x118);
-{ uint32_t x121; uint8_t x122 = _subborrow_u32(x119, x76, 0xffffffff, &x121);
-{ uint32_t x124; uint8_t x125 = _subborrow_u32(x122, x79, 0xffffffff, &x124);
-{ uint32_t x127; uint8_t x128 = _subborrow_u32(x125, x82, 0xffffffff, &x127);
-{ uint32_t x130; uint8_t x131 = _subborrow_u32(x128, x85, 0xffffffff, &x130);
-{ uint32_t x133; uint8_t x134 = _subborrow_u32(x131, x88, 0xffffffff, &x133);
-{ uint32_t x136; uint8_t x137 = _subborrow_u32(x134, x91, 0xffffffff, &x136);
-{ uint32_t x139; uint8_t x140 = _subborrow_u32(x137, x94, 0xffffffff, &x139);
-{ uint32_t x142; uint8_t x143 = _subborrow_u32(x140, x97, 0xffffffff, &x142);
-{ uint32_t x145; uint8_t x146 = _subborrow_u32(x143, x100, 0xffffffff, &x145);
-{ uint32_t x148; uint8_t x149 = _subborrow_u32(x146, x103, 0xf, &x148);
-{ uint32_t _; uint8_t x152 = _subborrow_u32(x149, x104, 0x0, &_);
-{ uint32_t x153 = cmovznz(x152, x148, x103);
-{ uint32_t x154 = cmovznz(x152, x145, x100);
-{ uint32_t x155 = cmovznz(x152, x142, x97);
-{ uint32_t x156 = cmovznz(x152, x139, x94);
-{ uint32_t x157 = cmovznz(x152, x136, x91);
-{ uint32_t x158 = cmovznz(x152, x133, x88);
-{ uint32_t x159 = cmovznz(x152, x130, x85);
-{ uint32_t x160 = cmovznz(x152, x127, x82);
-{ uint32_t x161 = cmovznz(x152, x124, x79);
-{ uint32_t x162 = cmovznz(x152, x121, x76);
-{ uint32_t x163 = cmovznz(x152, x118, x73);
-{ uint32_t x164 = cmovznz(x152, x115, x70);
-{ uint32_t x165 = cmovznz(x152, x112, x67);
-{ uint32_t x166 = cmovznz(x152, x109, x64);
-{ uint32_t x167 = cmovznz(x152, x106, x61);
-out[0] = x153;
-out[1] = x154;
-out[2] = x155;
-out[3] = x156;
-out[4] = x157;
-out[5] = x158;
-out[6] = x159;
-out[7] = x160;
-out[8] = x161;
-out[9] = x162;
-out[10] = x163;
-out[11] = x164;
-out[12] = x165;
-out[13] = x166;
-out[14] = x167;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[15];
+static void feadd(uint32_t out[15], const uint32_t in1[15], const uint32_t in2[15]) {
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x31 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x58 = in2[14];
+ { const uint32_t x59 = in2[13];
+ { const uint32_t x57 = in2[12];
+ { const uint32_t x55 = in2[11];
+ { const uint32_t x53 = in2[10];
+ { const uint32_t x51 = in2[9];
+ { const uint32_t x49 = in2[8];
+ { const uint32_t x47 = in2[7];
+ { const uint32_t x45 = in2[6];
+ { const uint32_t x43 = in2[5];
+ { const uint32_t x41 = in2[4];
+ { const uint32_t x39 = in2[3];
+ { const uint32_t x37 = in2[2];
+ { const uint32_t x35 = in2[1];
+ { const uint32_t x33 = in2[0];
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(0x0, x5, x33, &x61);
+ { uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x7, x35, &x64);
+ { uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x9, x37, &x67);
+ { uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x11, x39, &x70);
+ { uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x13, x41, &x73);
+ { uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x15, x43, &x76);
+ { uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x17, x45, &x79);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x19, x47, &x82);
+ { uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x21, x49, &x85);
+ { uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x23, x51, &x88);
+ { uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x25, x53, &x91);
+ { uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x27, x55, &x94);
+ { uint32_t x97; uint8_t x98 = _addcarryx_u32(x95, x29, x57, &x97);
+ { uint32_t x100; uint8_t x101 = _addcarryx_u32(x98, x31, x59, &x100);
+ { uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x30, x58, &x103);
+ { uint32_t x106; uint8_t x107 = _subborrow_u32(0x0, x61, 0xfffffffd, &x106);
+ { uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x64, 0xffffffff, &x109);
+ { uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x67, 0xffffffff, &x112);
+ { uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x70, 0xffffffff, &x115);
+ { uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x73, 0xffffffff, &x118);
+ { uint32_t x121; uint8_t x122 = _subborrow_u32(x119, x76, 0xffffffff, &x121);
+ { uint32_t x124; uint8_t x125 = _subborrow_u32(x122, x79, 0xffffffff, &x124);
+ { uint32_t x127; uint8_t x128 = _subborrow_u32(x125, x82, 0xffffffff, &x127);
+ { uint32_t x130; uint8_t x131 = _subborrow_u32(x128, x85, 0xffffffff, &x130);
+ { uint32_t x133; uint8_t x134 = _subborrow_u32(x131, x88, 0xffffffff, &x133);
+ { uint32_t x136; uint8_t x137 = _subborrow_u32(x134, x91, 0xffffffff, &x136);
+ { uint32_t x139; uint8_t x140 = _subborrow_u32(x137, x94, 0xffffffff, &x139);
+ { uint32_t x142; uint8_t x143 = _subborrow_u32(x140, x97, 0xffffffff, &x142);
+ { uint32_t x145; uint8_t x146 = _subborrow_u32(x143, x100, 0xffffffff, &x145);
+ { uint32_t x148; uint8_t x149 = _subborrow_u32(x146, x103, 0xf, &x148);
+ { uint32_t _; uint8_t x152 = _subborrow_u32(x149, x104, 0x0, &_);
+ { uint32_t x153 = cmovznz(x152, x148, x103);
+ { uint32_t x154 = cmovznz(x152, x145, x100);
+ { uint32_t x155 = cmovznz(x152, x142, x97);
+ { uint32_t x156 = cmovznz(x152, x139, x94);
+ { uint32_t x157 = cmovznz(x152, x136, x91);
+ { uint32_t x158 = cmovznz(x152, x133, x88);
+ { uint32_t x159 = cmovznz(x152, x130, x85);
+ { uint32_t x160 = cmovznz(x152, x127, x82);
+ { uint32_t x161 = cmovznz(x152, x124, x79);
+ { uint32_t x162 = cmovznz(x152, x121, x76);
+ { uint32_t x163 = cmovznz(x152, x118, x73);
+ { uint32_t x164 = cmovznz(x152, x115, x70);
+ { uint32_t x165 = cmovznz(x152, x112, x67);
+ { uint32_t x166 = cmovznz(x152, x109, x64);
+ { uint32_t x167 = cmovznz(x152, x106, x61);
+ out[0] = x167;
+ out[1] = x166;
+ out[2] = x165;
+ out[3] = x164;
+ out[4] = x163;
+ out[5] = x162;
+ out[6] = x161;
+ out[7] = x160;
+ out[8] = x159;
+ out[9] = x158;
+ out[10] = x157;
+ out[11] = x156;
+ out[12] = x155;
+ out[13] = x154;
+ out[14] = x153;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e452m3/fenz.c b/src/Specific/montgomery32_2e452m3/fenz.c
index 76004e346..d66853caf 100644
--- a/src/Specific/montgomery32_2e452m3/fenz.c
+++ b/src/Specific/montgomery32_2e452m3/fenz.c
@@ -1,36 +1,33 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x29 = (x28 | x27);
-{ uint32_t x30 = (x26 | x29);
-{ uint32_t x31 = (x24 | x30);
-{ uint32_t x32 = (x22 | x31);
-{ uint32_t x33 = (x20 | x32);
-{ uint32_t x34 = (x18 | x33);
-{ uint32_t x35 = (x16 | x34);
-{ uint32_t x36 = (x14 | x35);
-{ uint32_t x37 = (x12 | x36);
-{ uint32_t x38 = (x10 | x37);
-{ uint32_t x39 = (x8 | x38);
-{ uint32_t x40 = (x6 | x39);
-{ uint32_t x41 = (x4 | x40);
-{ uint32_t x42 = (x2 | x41);
-out[0] = x42;
-}}}}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[15]) {
+ { const uint32_t x27 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x29 = (x28 | x27);
+ { uint32_t x30 = (x26 | x29);
+ { uint32_t x31 = (x24 | x30);
+ { uint32_t x32 = (x22 | x31);
+ { uint32_t x33 = (x20 | x32);
+ { uint32_t x34 = (x18 | x33);
+ { uint32_t x35 = (x16 | x34);
+ { uint32_t x36 = (x14 | x35);
+ { uint32_t x37 = (x12 | x36);
+ { uint32_t x38 = (x10 | x37);
+ { uint32_t x39 = (x8 | x38);
+ { uint32_t x40 = (x6 | x39);
+ { uint32_t x41 = (x4 | x40);
+ { uint32_t x42 = (x2 | x41);
+ out[0] = x42;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e468m17/feadd.c b/src/Specific/montgomery32_2e468m17/feadd.c
index 2279360ae..2c479bbc3 100644
--- a/src/Specific/montgomery32_2e468m17/feadd.c
+++ b/src/Specific/montgomery32_2e468m17/feadd.c
@@ -1,82 +1,94 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x58, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33)
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(0x0, x5, x33, &x61);
-{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x7, x35, &x64);
-{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x9, x37, &x67);
-{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x11, x39, &x70);
-{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x13, x41, &x73);
-{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x15, x43, &x76);
-{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x17, x45, &x79);
-{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x19, x47, &x82);
-{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x21, x49, &x85);
-{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x23, x51, &x88);
-{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x25, x53, &x91);
-{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x27, x55, &x94);
-{ uint32_t x97; uint8_t x98 = _addcarryx_u32(x95, x29, x57, &x97);
-{ uint32_t x100; uint8_t x101 = _addcarryx_u32(x98, x31, x59, &x100);
-{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x30, x58, &x103);
-{ uint32_t x106; uint8_t x107 = _subborrow_u32(0x0, x61, 0xffffffef, &x106);
-{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x64, 0xffffffff, &x109);
-{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x67, 0xffffffff, &x112);
-{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x70, 0xffffffff, &x115);
-{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x73, 0xffffffff, &x118);
-{ uint32_t x121; uint8_t x122 = _subborrow_u32(x119, x76, 0xffffffff, &x121);
-{ uint32_t x124; uint8_t x125 = _subborrow_u32(x122, x79, 0xffffffff, &x124);
-{ uint32_t x127; uint8_t x128 = _subborrow_u32(x125, x82, 0xffffffff, &x127);
-{ uint32_t x130; uint8_t x131 = _subborrow_u32(x128, x85, 0xffffffff, &x130);
-{ uint32_t x133; uint8_t x134 = _subborrow_u32(x131, x88, 0xffffffff, &x133);
-{ uint32_t x136; uint8_t x137 = _subborrow_u32(x134, x91, 0xffffffff, &x136);
-{ uint32_t x139; uint8_t x140 = _subborrow_u32(x137, x94, 0xffffffff, &x139);
-{ uint32_t x142; uint8_t x143 = _subborrow_u32(x140, x97, 0xffffffff, &x142);
-{ uint32_t x145; uint8_t x146 = _subborrow_u32(x143, x100, 0xffffffff, &x145);
-{ uint32_t x148; uint8_t x149 = _subborrow_u32(x146, x103, 0xfffff, &x148);
-{ uint32_t _; uint8_t x152 = _subborrow_u32(x149, x104, 0x0, &_);
-{ uint32_t x153 = cmovznz(x152, x148, x103);
-{ uint32_t x154 = cmovznz(x152, x145, x100);
-{ uint32_t x155 = cmovznz(x152, x142, x97);
-{ uint32_t x156 = cmovznz(x152, x139, x94);
-{ uint32_t x157 = cmovznz(x152, x136, x91);
-{ uint32_t x158 = cmovznz(x152, x133, x88);
-{ uint32_t x159 = cmovznz(x152, x130, x85);
-{ uint32_t x160 = cmovznz(x152, x127, x82);
-{ uint32_t x161 = cmovznz(x152, x124, x79);
-{ uint32_t x162 = cmovznz(x152, x121, x76);
-{ uint32_t x163 = cmovznz(x152, x118, x73);
-{ uint32_t x164 = cmovznz(x152, x115, x70);
-{ uint32_t x165 = cmovznz(x152, x112, x67);
-{ uint32_t x166 = cmovznz(x152, x109, x64);
-{ uint32_t x167 = cmovznz(x152, x106, x61);
-out[0] = x153;
-out[1] = x154;
-out[2] = x155;
-out[3] = x156;
-out[4] = x157;
-out[5] = x158;
-out[6] = x159;
-out[7] = x160;
-out[8] = x161;
-out[9] = x162;
-out[10] = x163;
-out[11] = x164;
-out[12] = x165;
-out[13] = x166;
-out[14] = x167;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[15];
+static void feadd(uint32_t out[15], const uint32_t in1[15], const uint32_t in2[15]) {
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x31 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x58 = in2[14];
+ { const uint32_t x59 = in2[13];
+ { const uint32_t x57 = in2[12];
+ { const uint32_t x55 = in2[11];
+ { const uint32_t x53 = in2[10];
+ { const uint32_t x51 = in2[9];
+ { const uint32_t x49 = in2[8];
+ { const uint32_t x47 = in2[7];
+ { const uint32_t x45 = in2[6];
+ { const uint32_t x43 = in2[5];
+ { const uint32_t x41 = in2[4];
+ { const uint32_t x39 = in2[3];
+ { const uint32_t x37 = in2[2];
+ { const uint32_t x35 = in2[1];
+ { const uint32_t x33 = in2[0];
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(0x0, x5, x33, &x61);
+ { uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x7, x35, &x64);
+ { uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x9, x37, &x67);
+ { uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x11, x39, &x70);
+ { uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x13, x41, &x73);
+ { uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x15, x43, &x76);
+ { uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x17, x45, &x79);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x19, x47, &x82);
+ { uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x21, x49, &x85);
+ { uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x23, x51, &x88);
+ { uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x25, x53, &x91);
+ { uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x27, x55, &x94);
+ { uint32_t x97; uint8_t x98 = _addcarryx_u32(x95, x29, x57, &x97);
+ { uint32_t x100; uint8_t x101 = _addcarryx_u32(x98, x31, x59, &x100);
+ { uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x30, x58, &x103);
+ { uint32_t x106; uint8_t x107 = _subborrow_u32(0x0, x61, 0xffffffef, &x106);
+ { uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x64, 0xffffffff, &x109);
+ { uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x67, 0xffffffff, &x112);
+ { uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x70, 0xffffffff, &x115);
+ { uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x73, 0xffffffff, &x118);
+ { uint32_t x121; uint8_t x122 = _subborrow_u32(x119, x76, 0xffffffff, &x121);
+ { uint32_t x124; uint8_t x125 = _subborrow_u32(x122, x79, 0xffffffff, &x124);
+ { uint32_t x127; uint8_t x128 = _subborrow_u32(x125, x82, 0xffffffff, &x127);
+ { uint32_t x130; uint8_t x131 = _subborrow_u32(x128, x85, 0xffffffff, &x130);
+ { uint32_t x133; uint8_t x134 = _subborrow_u32(x131, x88, 0xffffffff, &x133);
+ { uint32_t x136; uint8_t x137 = _subborrow_u32(x134, x91, 0xffffffff, &x136);
+ { uint32_t x139; uint8_t x140 = _subborrow_u32(x137, x94, 0xffffffff, &x139);
+ { uint32_t x142; uint8_t x143 = _subborrow_u32(x140, x97, 0xffffffff, &x142);
+ { uint32_t x145; uint8_t x146 = _subborrow_u32(x143, x100, 0xffffffff, &x145);
+ { uint32_t x148; uint8_t x149 = _subborrow_u32(x146, x103, 0xfffff, &x148);
+ { uint32_t _; uint8_t x152 = _subborrow_u32(x149, x104, 0x0, &_);
+ { uint32_t x153 = cmovznz(x152, x148, x103);
+ { uint32_t x154 = cmovznz(x152, x145, x100);
+ { uint32_t x155 = cmovznz(x152, x142, x97);
+ { uint32_t x156 = cmovznz(x152, x139, x94);
+ { uint32_t x157 = cmovznz(x152, x136, x91);
+ { uint32_t x158 = cmovznz(x152, x133, x88);
+ { uint32_t x159 = cmovznz(x152, x130, x85);
+ { uint32_t x160 = cmovznz(x152, x127, x82);
+ { uint32_t x161 = cmovznz(x152, x124, x79);
+ { uint32_t x162 = cmovznz(x152, x121, x76);
+ { uint32_t x163 = cmovznz(x152, x118, x73);
+ { uint32_t x164 = cmovznz(x152, x115, x70);
+ { uint32_t x165 = cmovznz(x152, x112, x67);
+ { uint32_t x166 = cmovznz(x152, x109, x64);
+ { uint32_t x167 = cmovznz(x152, x106, x61);
+ out[0] = x167;
+ out[1] = x166;
+ out[2] = x165;
+ out[3] = x164;
+ out[4] = x163;
+ out[5] = x162;
+ out[6] = x161;
+ out[7] = x160;
+ out[8] = x159;
+ out[9] = x158;
+ out[10] = x157;
+ out[11] = x156;
+ out[12] = x155;
+ out[13] = x154;
+ out[14] = x153;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e468m17/fenz.c b/src/Specific/montgomery32_2e468m17/fenz.c
index 76004e346..d66853caf 100644
--- a/src/Specific/montgomery32_2e468m17/fenz.c
+++ b/src/Specific/montgomery32_2e468m17/fenz.c
@@ -1,36 +1,33 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x29 = (x28 | x27);
-{ uint32_t x30 = (x26 | x29);
-{ uint32_t x31 = (x24 | x30);
-{ uint32_t x32 = (x22 | x31);
-{ uint32_t x33 = (x20 | x32);
-{ uint32_t x34 = (x18 | x33);
-{ uint32_t x35 = (x16 | x34);
-{ uint32_t x36 = (x14 | x35);
-{ uint32_t x37 = (x12 | x36);
-{ uint32_t x38 = (x10 | x37);
-{ uint32_t x39 = (x8 | x38);
-{ uint32_t x40 = (x6 | x39);
-{ uint32_t x41 = (x4 | x40);
-{ uint32_t x42 = (x2 | x41);
-out[0] = x42;
-}}}}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[15]) {
+ { const uint32_t x27 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x29 = (x28 | x27);
+ { uint32_t x30 = (x26 | x29);
+ { uint32_t x31 = (x24 | x30);
+ { uint32_t x32 = (x22 | x31);
+ { uint32_t x33 = (x20 | x32);
+ { uint32_t x34 = (x18 | x33);
+ { uint32_t x35 = (x16 | x34);
+ { uint32_t x36 = (x14 | x35);
+ { uint32_t x37 = (x12 | x36);
+ { uint32_t x38 = (x10 | x37);
+ { uint32_t x39 = (x8 | x38);
+ { uint32_t x40 = (x6 | x39);
+ { uint32_t x41 = (x4 | x40);
+ { uint32_t x42 = (x2 | x41);
+ out[0] = x42;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e480m2e240m1/feadd.c b/src/Specific/montgomery32_2e480m2e240m1/feadd.c
index e3a7b717d..8ac24dff0 100644
--- a/src/Specific/montgomery32_2e480m2e240m1/feadd.c
+++ b/src/Specific/montgomery32_2e480m2e240m1/feadd.c
@@ -1,82 +1,94 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x58, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33)
-{ uint32_t x61; uint8_t x62 = _addcarryx_u32(0x0, x5, x33, &x61);
-{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x7, x35, &x64);
-{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x9, x37, &x67);
-{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x11, x39, &x70);
-{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x13, x41, &x73);
-{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x15, x43, &x76);
-{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x17, x45, &x79);
-{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x19, x47, &x82);
-{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x21, x49, &x85);
-{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x23, x51, &x88);
-{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x25, x53, &x91);
-{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x27, x55, &x94);
-{ uint32_t x97; uint8_t x98 = _addcarryx_u32(x95, x29, x57, &x97);
-{ uint32_t x100; uint8_t x101 = _addcarryx_u32(x98, x31, x59, &x100);
-{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x30, x58, &x103);
-{ uint32_t x106; uint8_t x107 = _subborrow_u32(0x0, x61, 0xffffffff, &x106);
-{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x64, 0xffffffff, &x109);
-{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x67, 0xffffffff, &x112);
-{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x70, 0xffffffff, &x115);
-{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x73, 0xffffffff, &x118);
-{ uint32_t x121; uint8_t x122 = _subborrow_u32(x119, x76, 0xffffffff, &x121);
-{ uint32_t x124; uint8_t x125 = _subborrow_u32(x122, x79, 0xffffffff, &x124);
-{ uint32_t x127; uint8_t x128 = _subborrow_u32(x125, x82, 0xfffeffff, &x127);
-{ uint32_t x130; uint8_t x131 = _subborrow_u32(x128, x85, 0xffffffff, &x130);
-{ uint32_t x133; uint8_t x134 = _subborrow_u32(x131, x88, 0xffffffff, &x133);
-{ uint32_t x136; uint8_t x137 = _subborrow_u32(x134, x91, 0xffffffff, &x136);
-{ uint32_t x139; uint8_t x140 = _subborrow_u32(x137, x94, 0xffffffff, &x139);
-{ uint32_t x142; uint8_t x143 = _subborrow_u32(x140, x97, 0xffffffff, &x142);
-{ uint32_t x145; uint8_t x146 = _subborrow_u32(x143, x100, 0xffffffff, &x145);
-{ uint32_t x148; uint8_t x149 = _subborrow_u32(x146, x103, 0xffffffff, &x148);
-{ uint32_t _; uint8_t x152 = _subborrow_u32(x149, x104, 0x0, &_);
-{ uint32_t x153 = cmovznz(x152, x148, x103);
-{ uint32_t x154 = cmovznz(x152, x145, x100);
-{ uint32_t x155 = cmovznz(x152, x142, x97);
-{ uint32_t x156 = cmovznz(x152, x139, x94);
-{ uint32_t x157 = cmovznz(x152, x136, x91);
-{ uint32_t x158 = cmovznz(x152, x133, x88);
-{ uint32_t x159 = cmovznz(x152, x130, x85);
-{ uint32_t x160 = cmovznz(x152, x127, x82);
-{ uint32_t x161 = cmovznz(x152, x124, x79);
-{ uint32_t x162 = cmovznz(x152, x121, x76);
-{ uint32_t x163 = cmovznz(x152, x118, x73);
-{ uint32_t x164 = cmovznz(x152, x115, x70);
-{ uint32_t x165 = cmovznz(x152, x112, x67);
-{ uint32_t x166 = cmovznz(x152, x109, x64);
-{ uint32_t x167 = cmovznz(x152, x106, x61);
-out[0] = x153;
-out[1] = x154;
-out[2] = x155;
-out[3] = x156;
-out[4] = x157;
-out[5] = x158;
-out[6] = x159;
-out[7] = x160;
-out[8] = x161;
-out[9] = x162;
-out[10] = x163;
-out[11] = x164;
-out[12] = x165;
-out[13] = x166;
-out[14] = x167;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[15];
+static void feadd(uint32_t out[15], const uint32_t in1[15], const uint32_t in2[15]) {
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x31 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x58 = in2[14];
+ { const uint32_t x59 = in2[13];
+ { const uint32_t x57 = in2[12];
+ { const uint32_t x55 = in2[11];
+ { const uint32_t x53 = in2[10];
+ { const uint32_t x51 = in2[9];
+ { const uint32_t x49 = in2[8];
+ { const uint32_t x47 = in2[7];
+ { const uint32_t x45 = in2[6];
+ { const uint32_t x43 = in2[5];
+ { const uint32_t x41 = in2[4];
+ { const uint32_t x39 = in2[3];
+ { const uint32_t x37 = in2[2];
+ { const uint32_t x35 = in2[1];
+ { const uint32_t x33 = in2[0];
+ { uint32_t x61; uint8_t x62 = _addcarryx_u32(0x0, x5, x33, &x61);
+ { uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x7, x35, &x64);
+ { uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x9, x37, &x67);
+ { uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x11, x39, &x70);
+ { uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x13, x41, &x73);
+ { uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x15, x43, &x76);
+ { uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x17, x45, &x79);
+ { uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x19, x47, &x82);
+ { uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x21, x49, &x85);
+ { uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x23, x51, &x88);
+ { uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x25, x53, &x91);
+ { uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x27, x55, &x94);
+ { uint32_t x97; uint8_t x98 = _addcarryx_u32(x95, x29, x57, &x97);
+ { uint32_t x100; uint8_t x101 = _addcarryx_u32(x98, x31, x59, &x100);
+ { uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x30, x58, &x103);
+ { uint32_t x106; uint8_t x107 = _subborrow_u32(0x0, x61, 0xffffffff, &x106);
+ { uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x64, 0xffffffff, &x109);
+ { uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x67, 0xffffffff, &x112);
+ { uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x70, 0xffffffff, &x115);
+ { uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x73, 0xffffffff, &x118);
+ { uint32_t x121; uint8_t x122 = _subborrow_u32(x119, x76, 0xffffffff, &x121);
+ { uint32_t x124; uint8_t x125 = _subborrow_u32(x122, x79, 0xffffffff, &x124);
+ { uint32_t x127; uint8_t x128 = _subborrow_u32(x125, x82, 0xfffeffff, &x127);
+ { uint32_t x130; uint8_t x131 = _subborrow_u32(x128, x85, 0xffffffff, &x130);
+ { uint32_t x133; uint8_t x134 = _subborrow_u32(x131, x88, 0xffffffff, &x133);
+ { uint32_t x136; uint8_t x137 = _subborrow_u32(x134, x91, 0xffffffff, &x136);
+ { uint32_t x139; uint8_t x140 = _subborrow_u32(x137, x94, 0xffffffff, &x139);
+ { uint32_t x142; uint8_t x143 = _subborrow_u32(x140, x97, 0xffffffff, &x142);
+ { uint32_t x145; uint8_t x146 = _subborrow_u32(x143, x100, 0xffffffff, &x145);
+ { uint32_t x148; uint8_t x149 = _subborrow_u32(x146, x103, 0xffffffff, &x148);
+ { uint32_t _; uint8_t x152 = _subborrow_u32(x149, x104, 0x0, &_);
+ { uint32_t x153 = cmovznz(x152, x148, x103);
+ { uint32_t x154 = cmovznz(x152, x145, x100);
+ { uint32_t x155 = cmovznz(x152, x142, x97);
+ { uint32_t x156 = cmovznz(x152, x139, x94);
+ { uint32_t x157 = cmovznz(x152, x136, x91);
+ { uint32_t x158 = cmovznz(x152, x133, x88);
+ { uint32_t x159 = cmovznz(x152, x130, x85);
+ { uint32_t x160 = cmovznz(x152, x127, x82);
+ { uint32_t x161 = cmovznz(x152, x124, x79);
+ { uint32_t x162 = cmovznz(x152, x121, x76);
+ { uint32_t x163 = cmovznz(x152, x118, x73);
+ { uint32_t x164 = cmovznz(x152, x115, x70);
+ { uint32_t x165 = cmovznz(x152, x112, x67);
+ { uint32_t x166 = cmovznz(x152, x109, x64);
+ { uint32_t x167 = cmovznz(x152, x106, x61);
+ out[0] = x167;
+ out[1] = x166;
+ out[2] = x165;
+ out[3] = x164;
+ out[4] = x163;
+ out[5] = x162;
+ out[6] = x161;
+ out[7] = x160;
+ out[8] = x159;
+ out[9] = x158;
+ out[10] = x157;
+ out[11] = x156;
+ out[12] = x155;
+ out[13] = x154;
+ out[14] = x153;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e480m2e240m1/fenz.c b/src/Specific/montgomery32_2e480m2e240m1/fenz.c
index 76004e346..d66853caf 100644
--- a/src/Specific/montgomery32_2e480m2e240m1/fenz.c
+++ b/src/Specific/montgomery32_2e480m2e240m1/fenz.c
@@ -1,36 +1,33 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x29 = (x28 | x27);
-{ uint32_t x30 = (x26 | x29);
-{ uint32_t x31 = (x24 | x30);
-{ uint32_t x32 = (x22 | x31);
-{ uint32_t x33 = (x20 | x32);
-{ uint32_t x34 = (x18 | x33);
-{ uint32_t x35 = (x16 | x34);
-{ uint32_t x36 = (x14 | x35);
-{ uint32_t x37 = (x12 | x36);
-{ uint32_t x38 = (x10 | x37);
-{ uint32_t x39 = (x8 | x38);
-{ uint32_t x40 = (x6 | x39);
-{ uint32_t x41 = (x4 | x40);
-{ uint32_t x42 = (x2 | x41);
-out[0] = x42;
-}}}}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[15]) {
+ { const uint32_t x27 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x29 = (x28 | x27);
+ { uint32_t x30 = (x26 | x29);
+ { uint32_t x31 = (x24 | x30);
+ { uint32_t x32 = (x22 | x31);
+ { uint32_t x33 = (x20 | x32);
+ { uint32_t x34 = (x18 | x33);
+ { uint32_t x35 = (x16 | x34);
+ { uint32_t x36 = (x14 | x35);
+ { uint32_t x37 = (x12 | x36);
+ { uint32_t x38 = (x10 | x37);
+ { uint32_t x39 = (x8 | x38);
+ { uint32_t x40 = (x6 | x39);
+ { uint32_t x41 = (x4 | x40);
+ { uint32_t x42 = (x2 | x41);
+ out[0] = x42;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e488m17/feadd.c b/src/Specific/montgomery32_2e488m17/feadd.c
index f13bcd9d5..73329fa32 100644
--- a/src/Specific/montgomery32_2e488m17/feadd.c
+++ b/src/Specific/montgomery32_2e488m17/feadd.c
@@ -1,86 +1,100 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
-{ uint32_t x65; uint8_t x66 = _addcarryx_u32(0x0, x5, x35, &x65);
-{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x7, x37, &x68);
-{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x9, x39, &x71);
-{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x11, x41, &x74);
-{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x13, x43, &x77);
-{ uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x15, x45, &x80);
-{ uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x17, x47, &x83);
-{ uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x19, x49, &x86);
-{ uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x21, x51, &x89);
-{ uint32_t x92; uint8_t x93 = _addcarryx_u32(x90, x23, x53, &x92);
-{ uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x25, x55, &x95);
-{ uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x27, x57, &x98);
-{ uint32_t x101; uint8_t x102 = _addcarryx_u32(x99, x29, x59, &x101);
-{ uint32_t x104; uint8_t x105 = _addcarryx_u32(x102, x31, x61, &x104);
-{ uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x33, x63, &x107);
-{ uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x32, x62, &x110);
-{ uint32_t x113; uint8_t x114 = _subborrow_u32(0x0, x65, 0xffffffef, &x113);
-{ uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x68, 0xffffffff, &x116);
-{ uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x71, 0xffffffff, &x119);
-{ uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x74, 0xffffffff, &x122);
-{ uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x77, 0xffffffff, &x125);
-{ uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x80, 0xffffffff, &x128);
-{ uint32_t x131; uint8_t x132 = _subborrow_u32(x129, x83, 0xffffffff, &x131);
-{ uint32_t x134; uint8_t x135 = _subborrow_u32(x132, x86, 0xffffffff, &x134);
-{ uint32_t x137; uint8_t x138 = _subborrow_u32(x135, x89, 0xffffffff, &x137);
-{ uint32_t x140; uint8_t x141 = _subborrow_u32(x138, x92, 0xffffffff, &x140);
-{ uint32_t x143; uint8_t x144 = _subborrow_u32(x141, x95, 0xffffffff, &x143);
-{ uint32_t x146; uint8_t x147 = _subborrow_u32(x144, x98, 0xffffffff, &x146);
-{ uint32_t x149; uint8_t x150 = _subborrow_u32(x147, x101, 0xffffffff, &x149);
-{ uint32_t x152; uint8_t x153 = _subborrow_u32(x150, x104, 0xffffffff, &x152);
-{ uint32_t x155; uint8_t x156 = _subborrow_u32(x153, x107, 0xffffffff, &x155);
-{ uint32_t x158; uint8_t x159 = _subborrow_u32(x156, x110, 0xff, &x158);
-{ uint32_t _; uint8_t x162 = _subborrow_u32(x159, x111, 0x0, &_);
-{ uint32_t x163 = cmovznz(x162, x158, x110);
-{ uint32_t x164 = cmovznz(x162, x155, x107);
-{ uint32_t x165 = cmovznz(x162, x152, x104);
-{ uint32_t x166 = cmovznz(x162, x149, x101);
-{ uint32_t x167 = cmovznz(x162, x146, x98);
-{ uint32_t x168 = cmovznz(x162, x143, x95);
-{ uint32_t x169 = cmovznz(x162, x140, x92);
-{ uint32_t x170 = cmovznz(x162, x137, x89);
-{ uint32_t x171 = cmovznz(x162, x134, x86);
-{ uint32_t x172 = cmovznz(x162, x131, x83);
-{ uint32_t x173 = cmovznz(x162, x128, x80);
-{ uint32_t x174 = cmovznz(x162, x125, x77);
-{ uint32_t x175 = cmovznz(x162, x122, x74);
-{ uint32_t x176 = cmovznz(x162, x119, x71);
-{ uint32_t x177 = cmovznz(x162, x116, x68);
-{ uint32_t x178 = cmovznz(x162, x113, x65);
-out[0] = x163;
-out[1] = x164;
-out[2] = x165;
-out[3] = x166;
-out[4] = x167;
-out[5] = x168;
-out[6] = x169;
-out[7] = x170;
-out[8] = x171;
-out[9] = x172;
-out[10] = x173;
-out[11] = x174;
-out[12] = x175;
-out[13] = x176;
-out[14] = x177;
-out[15] = x178;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[16];
+static void feadd(uint32_t out[16], const uint32_t in1[16], const uint32_t in2[16]) {
+ { const uint32_t x32 = in1[15];
+ { const uint32_t x33 = in1[14];
+ { const uint32_t x31 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x62 = in2[15];
+ { const uint32_t x63 = in2[14];
+ { const uint32_t x61 = in2[13];
+ { const uint32_t x59 = in2[12];
+ { const uint32_t x57 = in2[11];
+ { const uint32_t x55 = in2[10];
+ { const uint32_t x53 = in2[9];
+ { const uint32_t x51 = in2[8];
+ { const uint32_t x49 = in2[7];
+ { const uint32_t x47 = in2[6];
+ { const uint32_t x45 = in2[5];
+ { const uint32_t x43 = in2[4];
+ { const uint32_t x41 = in2[3];
+ { const uint32_t x39 = in2[2];
+ { const uint32_t x37 = in2[1];
+ { const uint32_t x35 = in2[0];
+ { uint32_t x65; uint8_t x66 = _addcarryx_u32(0x0, x5, x35, &x65);
+ { uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x7, x37, &x68);
+ { uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x9, x39, &x71);
+ { uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x11, x41, &x74);
+ { uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x13, x43, &x77);
+ { uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x15, x45, &x80);
+ { uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x17, x47, &x83);
+ { uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x19, x49, &x86);
+ { uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x21, x51, &x89);
+ { uint32_t x92; uint8_t x93 = _addcarryx_u32(x90, x23, x53, &x92);
+ { uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x25, x55, &x95);
+ { uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x27, x57, &x98);
+ { uint32_t x101; uint8_t x102 = _addcarryx_u32(x99, x29, x59, &x101);
+ { uint32_t x104; uint8_t x105 = _addcarryx_u32(x102, x31, x61, &x104);
+ { uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x33, x63, &x107);
+ { uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x32, x62, &x110);
+ { uint32_t x113; uint8_t x114 = _subborrow_u32(0x0, x65, 0xffffffef, &x113);
+ { uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x68, 0xffffffff, &x116);
+ { uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x71, 0xffffffff, &x119);
+ { uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x74, 0xffffffff, &x122);
+ { uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x77, 0xffffffff, &x125);
+ { uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x80, 0xffffffff, &x128);
+ { uint32_t x131; uint8_t x132 = _subborrow_u32(x129, x83, 0xffffffff, &x131);
+ { uint32_t x134; uint8_t x135 = _subborrow_u32(x132, x86, 0xffffffff, &x134);
+ { uint32_t x137; uint8_t x138 = _subborrow_u32(x135, x89, 0xffffffff, &x137);
+ { uint32_t x140; uint8_t x141 = _subborrow_u32(x138, x92, 0xffffffff, &x140);
+ { uint32_t x143; uint8_t x144 = _subborrow_u32(x141, x95, 0xffffffff, &x143);
+ { uint32_t x146; uint8_t x147 = _subborrow_u32(x144, x98, 0xffffffff, &x146);
+ { uint32_t x149; uint8_t x150 = _subborrow_u32(x147, x101, 0xffffffff, &x149);
+ { uint32_t x152; uint8_t x153 = _subborrow_u32(x150, x104, 0xffffffff, &x152);
+ { uint32_t x155; uint8_t x156 = _subborrow_u32(x153, x107, 0xffffffff, &x155);
+ { uint32_t x158; uint8_t x159 = _subborrow_u32(x156, x110, 0xff, &x158);
+ { uint32_t _; uint8_t x162 = _subborrow_u32(x159, x111, 0x0, &_);
+ { uint32_t x163 = cmovznz(x162, x158, x110);
+ { uint32_t x164 = cmovznz(x162, x155, x107);
+ { uint32_t x165 = cmovznz(x162, x152, x104);
+ { uint32_t x166 = cmovznz(x162, x149, x101);
+ { uint32_t x167 = cmovznz(x162, x146, x98);
+ { uint32_t x168 = cmovznz(x162, x143, x95);
+ { uint32_t x169 = cmovznz(x162, x140, x92);
+ { uint32_t x170 = cmovznz(x162, x137, x89);
+ { uint32_t x171 = cmovznz(x162, x134, x86);
+ { uint32_t x172 = cmovznz(x162, x131, x83);
+ { uint32_t x173 = cmovznz(x162, x128, x80);
+ { uint32_t x174 = cmovznz(x162, x125, x77);
+ { uint32_t x175 = cmovznz(x162, x122, x74);
+ { uint32_t x176 = cmovznz(x162, x119, x71);
+ { uint32_t x177 = cmovznz(x162, x116, x68);
+ { uint32_t x178 = cmovznz(x162, x113, x65);
+ out[0] = x178;
+ out[1] = x177;
+ out[2] = x176;
+ out[3] = x175;
+ out[4] = x174;
+ out[5] = x173;
+ out[6] = x172;
+ out[7] = x171;
+ out[8] = x170;
+ out[9] = x169;
+ out[10] = x168;
+ out[11] = x167;
+ out[12] = x166;
+ out[13] = x165;
+ out[14] = x164;
+ out[15] = x163;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e488m17/fenz.c b/src/Specific/montgomery32_2e488m17/fenz.c
index 9290e38c5..5c5d21b0e 100644
--- a/src/Specific/montgomery32_2e488m17/fenz.c
+++ b/src/Specific/montgomery32_2e488m17/fenz.c
@@ -1,37 +1,35 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x31 = (x30 | x29);
-{ uint32_t x32 = (x28 | x31);
-{ uint32_t x33 = (x26 | x32);
-{ uint32_t x34 = (x24 | x33);
-{ uint32_t x35 = (x22 | x34);
-{ uint32_t x36 = (x20 | x35);
-{ uint32_t x37 = (x18 | x36);
-{ uint32_t x38 = (x16 | x37);
-{ uint32_t x39 = (x14 | x38);
-{ uint32_t x40 = (x12 | x39);
-{ uint32_t x41 = (x10 | x40);
-{ uint32_t x42 = (x8 | x41);
-{ uint32_t x43 = (x6 | x42);
-{ uint32_t x44 = (x4 | x43);
-{ uint32_t x45 = (x2 | x44);
-out[0] = x45;
-}}}}}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x31 = (x30 | x29);
+ { uint32_t x32 = (x28 | x31);
+ { uint32_t x33 = (x26 | x32);
+ { uint32_t x34 = (x24 | x33);
+ { uint32_t x35 = (x22 | x34);
+ { uint32_t x36 = (x20 | x35);
+ { uint32_t x37 = (x18 | x36);
+ { uint32_t x38 = (x16 | x37);
+ { uint32_t x39 = (x14 | x38);
+ { uint32_t x40 = (x12 | x39);
+ { uint32_t x41 = (x10 | x40);
+ { uint32_t x42 = (x8 | x41);
+ { uint32_t x43 = (x6 | x42);
+ { uint32_t x44 = (x4 | x43);
+ { uint32_t x45 = (x2 | x44);
+ out[0] = x45;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e489m21/feadd.c b/src/Specific/montgomery32_2e489m21/feadd.c
index 88920f35e..453317b36 100644
--- a/src/Specific/montgomery32_2e489m21/feadd.c
+++ b/src/Specific/montgomery32_2e489m21/feadd.c
@@ -1,86 +1,100 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
-{ uint32_t x65; uint8_t x66 = _addcarryx_u32(0x0, x5, x35, &x65);
-{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x7, x37, &x68);
-{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x9, x39, &x71);
-{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x11, x41, &x74);
-{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x13, x43, &x77);
-{ uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x15, x45, &x80);
-{ uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x17, x47, &x83);
-{ uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x19, x49, &x86);
-{ uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x21, x51, &x89);
-{ uint32_t x92; uint8_t x93 = _addcarryx_u32(x90, x23, x53, &x92);
-{ uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x25, x55, &x95);
-{ uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x27, x57, &x98);
-{ uint32_t x101; uint8_t x102 = _addcarryx_u32(x99, x29, x59, &x101);
-{ uint32_t x104; uint8_t x105 = _addcarryx_u32(x102, x31, x61, &x104);
-{ uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x33, x63, &x107);
-{ uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x32, x62, &x110);
-{ uint32_t x113; uint8_t x114 = _subborrow_u32(0x0, x65, 0xffffffeb, &x113);
-{ uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x68, 0xffffffff, &x116);
-{ uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x71, 0xffffffff, &x119);
-{ uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x74, 0xffffffff, &x122);
-{ uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x77, 0xffffffff, &x125);
-{ uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x80, 0xffffffff, &x128);
-{ uint32_t x131; uint8_t x132 = _subborrow_u32(x129, x83, 0xffffffff, &x131);
-{ uint32_t x134; uint8_t x135 = _subborrow_u32(x132, x86, 0xffffffff, &x134);
-{ uint32_t x137; uint8_t x138 = _subborrow_u32(x135, x89, 0xffffffff, &x137);
-{ uint32_t x140; uint8_t x141 = _subborrow_u32(x138, x92, 0xffffffff, &x140);
-{ uint32_t x143; uint8_t x144 = _subborrow_u32(x141, x95, 0xffffffff, &x143);
-{ uint32_t x146; uint8_t x147 = _subborrow_u32(x144, x98, 0xffffffff, &x146);
-{ uint32_t x149; uint8_t x150 = _subborrow_u32(x147, x101, 0xffffffff, &x149);
-{ uint32_t x152; uint8_t x153 = _subborrow_u32(x150, x104, 0xffffffff, &x152);
-{ uint32_t x155; uint8_t x156 = _subborrow_u32(x153, x107, 0xffffffff, &x155);
-{ uint32_t x158; uint8_t x159 = _subborrow_u32(x156, x110, 0x1ff, &x158);
-{ uint32_t _; uint8_t x162 = _subborrow_u32(x159, x111, 0x0, &_);
-{ uint32_t x163 = cmovznz(x162, x158, x110);
-{ uint32_t x164 = cmovznz(x162, x155, x107);
-{ uint32_t x165 = cmovznz(x162, x152, x104);
-{ uint32_t x166 = cmovznz(x162, x149, x101);
-{ uint32_t x167 = cmovznz(x162, x146, x98);
-{ uint32_t x168 = cmovznz(x162, x143, x95);
-{ uint32_t x169 = cmovznz(x162, x140, x92);
-{ uint32_t x170 = cmovznz(x162, x137, x89);
-{ uint32_t x171 = cmovznz(x162, x134, x86);
-{ uint32_t x172 = cmovznz(x162, x131, x83);
-{ uint32_t x173 = cmovznz(x162, x128, x80);
-{ uint32_t x174 = cmovznz(x162, x125, x77);
-{ uint32_t x175 = cmovznz(x162, x122, x74);
-{ uint32_t x176 = cmovznz(x162, x119, x71);
-{ uint32_t x177 = cmovznz(x162, x116, x68);
-{ uint32_t x178 = cmovznz(x162, x113, x65);
-out[0] = x163;
-out[1] = x164;
-out[2] = x165;
-out[3] = x166;
-out[4] = x167;
-out[5] = x168;
-out[6] = x169;
-out[7] = x170;
-out[8] = x171;
-out[9] = x172;
-out[10] = x173;
-out[11] = x174;
-out[12] = x175;
-out[13] = x176;
-out[14] = x177;
-out[15] = x178;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[16];
+static void feadd(uint32_t out[16], const uint32_t in1[16], const uint32_t in2[16]) {
+ { const uint32_t x32 = in1[15];
+ { const uint32_t x33 = in1[14];
+ { const uint32_t x31 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x62 = in2[15];
+ { const uint32_t x63 = in2[14];
+ { const uint32_t x61 = in2[13];
+ { const uint32_t x59 = in2[12];
+ { const uint32_t x57 = in2[11];
+ { const uint32_t x55 = in2[10];
+ { const uint32_t x53 = in2[9];
+ { const uint32_t x51 = in2[8];
+ { const uint32_t x49 = in2[7];
+ { const uint32_t x47 = in2[6];
+ { const uint32_t x45 = in2[5];
+ { const uint32_t x43 = in2[4];
+ { const uint32_t x41 = in2[3];
+ { const uint32_t x39 = in2[2];
+ { const uint32_t x37 = in2[1];
+ { const uint32_t x35 = in2[0];
+ { uint32_t x65; uint8_t x66 = _addcarryx_u32(0x0, x5, x35, &x65);
+ { uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x7, x37, &x68);
+ { uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x9, x39, &x71);
+ { uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x11, x41, &x74);
+ { uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x13, x43, &x77);
+ { uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x15, x45, &x80);
+ { uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x17, x47, &x83);
+ { uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x19, x49, &x86);
+ { uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x21, x51, &x89);
+ { uint32_t x92; uint8_t x93 = _addcarryx_u32(x90, x23, x53, &x92);
+ { uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x25, x55, &x95);
+ { uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x27, x57, &x98);
+ { uint32_t x101; uint8_t x102 = _addcarryx_u32(x99, x29, x59, &x101);
+ { uint32_t x104; uint8_t x105 = _addcarryx_u32(x102, x31, x61, &x104);
+ { uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x33, x63, &x107);
+ { uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x32, x62, &x110);
+ { uint32_t x113; uint8_t x114 = _subborrow_u32(0x0, x65, 0xffffffeb, &x113);
+ { uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x68, 0xffffffff, &x116);
+ { uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x71, 0xffffffff, &x119);
+ { uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x74, 0xffffffff, &x122);
+ { uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x77, 0xffffffff, &x125);
+ { uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x80, 0xffffffff, &x128);
+ { uint32_t x131; uint8_t x132 = _subborrow_u32(x129, x83, 0xffffffff, &x131);
+ { uint32_t x134; uint8_t x135 = _subborrow_u32(x132, x86, 0xffffffff, &x134);
+ { uint32_t x137; uint8_t x138 = _subborrow_u32(x135, x89, 0xffffffff, &x137);
+ { uint32_t x140; uint8_t x141 = _subborrow_u32(x138, x92, 0xffffffff, &x140);
+ { uint32_t x143; uint8_t x144 = _subborrow_u32(x141, x95, 0xffffffff, &x143);
+ { uint32_t x146; uint8_t x147 = _subborrow_u32(x144, x98, 0xffffffff, &x146);
+ { uint32_t x149; uint8_t x150 = _subborrow_u32(x147, x101, 0xffffffff, &x149);
+ { uint32_t x152; uint8_t x153 = _subborrow_u32(x150, x104, 0xffffffff, &x152);
+ { uint32_t x155; uint8_t x156 = _subborrow_u32(x153, x107, 0xffffffff, &x155);
+ { uint32_t x158; uint8_t x159 = _subborrow_u32(x156, x110, 0x1ff, &x158);
+ { uint32_t _; uint8_t x162 = _subborrow_u32(x159, x111, 0x0, &_);
+ { uint32_t x163 = cmovznz(x162, x158, x110);
+ { uint32_t x164 = cmovznz(x162, x155, x107);
+ { uint32_t x165 = cmovznz(x162, x152, x104);
+ { uint32_t x166 = cmovznz(x162, x149, x101);
+ { uint32_t x167 = cmovznz(x162, x146, x98);
+ { uint32_t x168 = cmovznz(x162, x143, x95);
+ { uint32_t x169 = cmovznz(x162, x140, x92);
+ { uint32_t x170 = cmovznz(x162, x137, x89);
+ { uint32_t x171 = cmovznz(x162, x134, x86);
+ { uint32_t x172 = cmovznz(x162, x131, x83);
+ { uint32_t x173 = cmovznz(x162, x128, x80);
+ { uint32_t x174 = cmovznz(x162, x125, x77);
+ { uint32_t x175 = cmovznz(x162, x122, x74);
+ { uint32_t x176 = cmovznz(x162, x119, x71);
+ { uint32_t x177 = cmovznz(x162, x116, x68);
+ { uint32_t x178 = cmovznz(x162, x113, x65);
+ out[0] = x178;
+ out[1] = x177;
+ out[2] = x176;
+ out[3] = x175;
+ out[4] = x174;
+ out[5] = x173;
+ out[6] = x172;
+ out[7] = x171;
+ out[8] = x170;
+ out[9] = x169;
+ out[10] = x168;
+ out[11] = x167;
+ out[12] = x166;
+ out[13] = x165;
+ out[14] = x164;
+ out[15] = x163;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e489m21/fenz.c b/src/Specific/montgomery32_2e489m21/fenz.c
index 9290e38c5..5c5d21b0e 100644
--- a/src/Specific/montgomery32_2e489m21/fenz.c
+++ b/src/Specific/montgomery32_2e489m21/fenz.c
@@ -1,37 +1,35 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x31 = (x30 | x29);
-{ uint32_t x32 = (x28 | x31);
-{ uint32_t x33 = (x26 | x32);
-{ uint32_t x34 = (x24 | x33);
-{ uint32_t x35 = (x22 | x34);
-{ uint32_t x36 = (x20 | x35);
-{ uint32_t x37 = (x18 | x36);
-{ uint32_t x38 = (x16 | x37);
-{ uint32_t x39 = (x14 | x38);
-{ uint32_t x40 = (x12 | x39);
-{ uint32_t x41 = (x10 | x40);
-{ uint32_t x42 = (x8 | x41);
-{ uint32_t x43 = (x6 | x42);
-{ uint32_t x44 = (x4 | x43);
-{ uint32_t x45 = (x2 | x44);
-out[0] = x45;
-}}}}}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x31 = (x30 | x29);
+ { uint32_t x32 = (x28 | x31);
+ { uint32_t x33 = (x26 | x32);
+ { uint32_t x34 = (x24 | x33);
+ { uint32_t x35 = (x22 | x34);
+ { uint32_t x36 = (x20 | x35);
+ { uint32_t x37 = (x18 | x36);
+ { uint32_t x38 = (x16 | x37);
+ { uint32_t x39 = (x14 | x38);
+ { uint32_t x40 = (x12 | x39);
+ { uint32_t x41 = (x10 | x40);
+ { uint32_t x42 = (x8 | x41);
+ { uint32_t x43 = (x6 | x42);
+ { uint32_t x44 = (x4 | x43);
+ { uint32_t x45 = (x2 | x44);
+ out[0] = x45;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e495m31/feadd.c b/src/Specific/montgomery32_2e495m31/feadd.c
index ebc163343..61105f800 100644
--- a/src/Specific/montgomery32_2e495m31/feadd.c
+++ b/src/Specific/montgomery32_2e495m31/feadd.c
@@ -1,86 +1,100 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
-{ uint32_t x65; uint8_t x66 = _addcarryx_u32(0x0, x5, x35, &x65);
-{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x7, x37, &x68);
-{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x9, x39, &x71);
-{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x11, x41, &x74);
-{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x13, x43, &x77);
-{ uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x15, x45, &x80);
-{ uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x17, x47, &x83);
-{ uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x19, x49, &x86);
-{ uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x21, x51, &x89);
-{ uint32_t x92; uint8_t x93 = _addcarryx_u32(x90, x23, x53, &x92);
-{ uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x25, x55, &x95);
-{ uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x27, x57, &x98);
-{ uint32_t x101; uint8_t x102 = _addcarryx_u32(x99, x29, x59, &x101);
-{ uint32_t x104; uint8_t x105 = _addcarryx_u32(x102, x31, x61, &x104);
-{ uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x33, x63, &x107);
-{ uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x32, x62, &x110);
-{ uint32_t x113; uint8_t x114 = _subborrow_u32(0x0, x65, 0xffffffe1, &x113);
-{ uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x68, 0xffffffff, &x116);
-{ uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x71, 0xffffffff, &x119);
-{ uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x74, 0xffffffff, &x122);
-{ uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x77, 0xffffffff, &x125);
-{ uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x80, 0xffffffff, &x128);
-{ uint32_t x131; uint8_t x132 = _subborrow_u32(x129, x83, 0xffffffff, &x131);
-{ uint32_t x134; uint8_t x135 = _subborrow_u32(x132, x86, 0xffffffff, &x134);
-{ uint32_t x137; uint8_t x138 = _subborrow_u32(x135, x89, 0xffffffff, &x137);
-{ uint32_t x140; uint8_t x141 = _subborrow_u32(x138, x92, 0xffffffff, &x140);
-{ uint32_t x143; uint8_t x144 = _subborrow_u32(x141, x95, 0xffffffff, &x143);
-{ uint32_t x146; uint8_t x147 = _subborrow_u32(x144, x98, 0xffffffff, &x146);
-{ uint32_t x149; uint8_t x150 = _subborrow_u32(x147, x101, 0xffffffff, &x149);
-{ uint32_t x152; uint8_t x153 = _subborrow_u32(x150, x104, 0xffffffff, &x152);
-{ uint32_t x155; uint8_t x156 = _subborrow_u32(x153, x107, 0xffffffff, &x155);
-{ uint32_t x158; uint8_t x159 = _subborrow_u32(x156, x110, 0x7fff, &x158);
-{ uint32_t _; uint8_t x162 = _subborrow_u32(x159, x111, 0x0, &_);
-{ uint32_t x163 = cmovznz(x162, x158, x110);
-{ uint32_t x164 = cmovznz(x162, x155, x107);
-{ uint32_t x165 = cmovznz(x162, x152, x104);
-{ uint32_t x166 = cmovznz(x162, x149, x101);
-{ uint32_t x167 = cmovznz(x162, x146, x98);
-{ uint32_t x168 = cmovznz(x162, x143, x95);
-{ uint32_t x169 = cmovznz(x162, x140, x92);
-{ uint32_t x170 = cmovznz(x162, x137, x89);
-{ uint32_t x171 = cmovznz(x162, x134, x86);
-{ uint32_t x172 = cmovznz(x162, x131, x83);
-{ uint32_t x173 = cmovznz(x162, x128, x80);
-{ uint32_t x174 = cmovznz(x162, x125, x77);
-{ uint32_t x175 = cmovznz(x162, x122, x74);
-{ uint32_t x176 = cmovznz(x162, x119, x71);
-{ uint32_t x177 = cmovznz(x162, x116, x68);
-{ uint32_t x178 = cmovznz(x162, x113, x65);
-out[0] = x163;
-out[1] = x164;
-out[2] = x165;
-out[3] = x166;
-out[4] = x167;
-out[5] = x168;
-out[6] = x169;
-out[7] = x170;
-out[8] = x171;
-out[9] = x172;
-out[10] = x173;
-out[11] = x174;
-out[12] = x175;
-out[13] = x176;
-out[14] = x177;
-out[15] = x178;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[16];
+static void feadd(uint32_t out[16], const uint32_t in1[16], const uint32_t in2[16]) {
+ { const uint32_t x32 = in1[15];
+ { const uint32_t x33 = in1[14];
+ { const uint32_t x31 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x62 = in2[15];
+ { const uint32_t x63 = in2[14];
+ { const uint32_t x61 = in2[13];
+ { const uint32_t x59 = in2[12];
+ { const uint32_t x57 = in2[11];
+ { const uint32_t x55 = in2[10];
+ { const uint32_t x53 = in2[9];
+ { const uint32_t x51 = in2[8];
+ { const uint32_t x49 = in2[7];
+ { const uint32_t x47 = in2[6];
+ { const uint32_t x45 = in2[5];
+ { const uint32_t x43 = in2[4];
+ { const uint32_t x41 = in2[3];
+ { const uint32_t x39 = in2[2];
+ { const uint32_t x37 = in2[1];
+ { const uint32_t x35 = in2[0];
+ { uint32_t x65; uint8_t x66 = _addcarryx_u32(0x0, x5, x35, &x65);
+ { uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x7, x37, &x68);
+ { uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x9, x39, &x71);
+ { uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x11, x41, &x74);
+ { uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x13, x43, &x77);
+ { uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x15, x45, &x80);
+ { uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x17, x47, &x83);
+ { uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x19, x49, &x86);
+ { uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x21, x51, &x89);
+ { uint32_t x92; uint8_t x93 = _addcarryx_u32(x90, x23, x53, &x92);
+ { uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x25, x55, &x95);
+ { uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x27, x57, &x98);
+ { uint32_t x101; uint8_t x102 = _addcarryx_u32(x99, x29, x59, &x101);
+ { uint32_t x104; uint8_t x105 = _addcarryx_u32(x102, x31, x61, &x104);
+ { uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x33, x63, &x107);
+ { uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x32, x62, &x110);
+ { uint32_t x113; uint8_t x114 = _subborrow_u32(0x0, x65, 0xffffffe1, &x113);
+ { uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x68, 0xffffffff, &x116);
+ { uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x71, 0xffffffff, &x119);
+ { uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x74, 0xffffffff, &x122);
+ { uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x77, 0xffffffff, &x125);
+ { uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x80, 0xffffffff, &x128);
+ { uint32_t x131; uint8_t x132 = _subborrow_u32(x129, x83, 0xffffffff, &x131);
+ { uint32_t x134; uint8_t x135 = _subborrow_u32(x132, x86, 0xffffffff, &x134);
+ { uint32_t x137; uint8_t x138 = _subborrow_u32(x135, x89, 0xffffffff, &x137);
+ { uint32_t x140; uint8_t x141 = _subborrow_u32(x138, x92, 0xffffffff, &x140);
+ { uint32_t x143; uint8_t x144 = _subborrow_u32(x141, x95, 0xffffffff, &x143);
+ { uint32_t x146; uint8_t x147 = _subborrow_u32(x144, x98, 0xffffffff, &x146);
+ { uint32_t x149; uint8_t x150 = _subborrow_u32(x147, x101, 0xffffffff, &x149);
+ { uint32_t x152; uint8_t x153 = _subborrow_u32(x150, x104, 0xffffffff, &x152);
+ { uint32_t x155; uint8_t x156 = _subborrow_u32(x153, x107, 0xffffffff, &x155);
+ { uint32_t x158; uint8_t x159 = _subborrow_u32(x156, x110, 0x7fff, &x158);
+ { uint32_t _; uint8_t x162 = _subborrow_u32(x159, x111, 0x0, &_);
+ { uint32_t x163 = cmovznz(x162, x158, x110);
+ { uint32_t x164 = cmovznz(x162, x155, x107);
+ { uint32_t x165 = cmovznz(x162, x152, x104);
+ { uint32_t x166 = cmovznz(x162, x149, x101);
+ { uint32_t x167 = cmovznz(x162, x146, x98);
+ { uint32_t x168 = cmovznz(x162, x143, x95);
+ { uint32_t x169 = cmovznz(x162, x140, x92);
+ { uint32_t x170 = cmovznz(x162, x137, x89);
+ { uint32_t x171 = cmovznz(x162, x134, x86);
+ { uint32_t x172 = cmovznz(x162, x131, x83);
+ { uint32_t x173 = cmovznz(x162, x128, x80);
+ { uint32_t x174 = cmovznz(x162, x125, x77);
+ { uint32_t x175 = cmovznz(x162, x122, x74);
+ { uint32_t x176 = cmovznz(x162, x119, x71);
+ { uint32_t x177 = cmovznz(x162, x116, x68);
+ { uint32_t x178 = cmovznz(x162, x113, x65);
+ out[0] = x178;
+ out[1] = x177;
+ out[2] = x176;
+ out[3] = x175;
+ out[4] = x174;
+ out[5] = x173;
+ out[6] = x172;
+ out[7] = x171;
+ out[8] = x170;
+ out[9] = x169;
+ out[10] = x168;
+ out[11] = x167;
+ out[12] = x166;
+ out[13] = x165;
+ out[14] = x164;
+ out[15] = x163;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e495m31/fenz.c b/src/Specific/montgomery32_2e495m31/fenz.c
index 9290e38c5..5c5d21b0e 100644
--- a/src/Specific/montgomery32_2e495m31/fenz.c
+++ b/src/Specific/montgomery32_2e495m31/fenz.c
@@ -1,37 +1,35 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x31 = (x30 | x29);
-{ uint32_t x32 = (x28 | x31);
-{ uint32_t x33 = (x26 | x32);
-{ uint32_t x34 = (x24 | x33);
-{ uint32_t x35 = (x22 | x34);
-{ uint32_t x36 = (x20 | x35);
-{ uint32_t x37 = (x18 | x36);
-{ uint32_t x38 = (x16 | x37);
-{ uint32_t x39 = (x14 | x38);
-{ uint32_t x40 = (x12 | x39);
-{ uint32_t x41 = (x10 | x40);
-{ uint32_t x42 = (x8 | x41);
-{ uint32_t x43 = (x6 | x42);
-{ uint32_t x44 = (x4 | x43);
-{ uint32_t x45 = (x2 | x44);
-out[0] = x45;
-}}}}}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x31 = (x30 | x29);
+ { uint32_t x32 = (x28 | x31);
+ { uint32_t x33 = (x26 | x32);
+ { uint32_t x34 = (x24 | x33);
+ { uint32_t x35 = (x22 | x34);
+ { uint32_t x36 = (x20 | x35);
+ { uint32_t x37 = (x18 | x36);
+ { uint32_t x38 = (x16 | x37);
+ { uint32_t x39 = (x14 | x38);
+ { uint32_t x40 = (x12 | x39);
+ { uint32_t x41 = (x10 | x40);
+ { uint32_t x42 = (x8 | x41);
+ { uint32_t x43 = (x6 | x42);
+ { uint32_t x44 = (x4 | x43);
+ { uint32_t x45 = (x2 | x44);
+ out[0] = x45;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e510m290x2e496m1/feadd.c b/src/Specific/montgomery32_2e510m290x2e496m1/feadd.c
new file mode 100644
index 000000000..8f1f62b09
--- /dev/null
+++ b/src/Specific/montgomery32_2e510m290x2e496m1/feadd.c
@@ -0,0 +1,100 @@
+static void feadd(uint32_t out[16], const uint32_t in1[16], const uint32_t in2[16]) {
+ { const uint32_t x32 = in1[15];
+ { const uint32_t x33 = in1[14];
+ { const uint32_t x31 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x62 = in2[15];
+ { const uint32_t x63 = in2[14];
+ { const uint32_t x61 = in2[13];
+ { const uint32_t x59 = in2[12];
+ { const uint32_t x57 = in2[11];
+ { const uint32_t x55 = in2[10];
+ { const uint32_t x53 = in2[9];
+ { const uint32_t x51 = in2[8];
+ { const uint32_t x49 = in2[7];
+ { const uint32_t x47 = in2[6];
+ { const uint32_t x45 = in2[5];
+ { const uint32_t x43 = in2[4];
+ { const uint32_t x41 = in2[3];
+ { const uint32_t x39 = in2[2];
+ { const uint32_t x37 = in2[1];
+ { const uint32_t x35 = in2[0];
+ { uint32_t x65; uint8_t x66 = _addcarryx_u32(0x0, x5, x35, &x65);
+ { uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x7, x37, &x68);
+ { uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x9, x39, &x71);
+ { uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x11, x41, &x74);
+ { uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x13, x43, &x77);
+ { uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x15, x45, &x80);
+ { uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x17, x47, &x83);
+ { uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x19, x49, &x86);
+ { uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x21, x51, &x89);
+ { uint32_t x92; uint8_t x93 = _addcarryx_u32(x90, x23, x53, &x92);
+ { uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x25, x55, &x95);
+ { uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x27, x57, &x98);
+ { uint32_t x101; uint8_t x102 = _addcarryx_u32(x99, x29, x59, &x101);
+ { uint32_t x104; uint8_t x105 = _addcarryx_u32(x102, x31, x61, &x104);
+ { uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x33, x63, &x107);
+ { uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x32, x62, &x110);
+ { uint32_t x113; uint8_t x114 = _subborrow_u32(0x0, x65, 0xffffffff, &x113);
+ { uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x68, 0xffffffff, &x116);
+ { uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x71, 0xffffffff, &x119);
+ { uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x74, 0xffffffff, &x122);
+ { uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x77, 0xffffffff, &x125);
+ { uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x80, 0xffffffff, &x128);
+ { uint32_t x131; uint8_t x132 = _subborrow_u32(x129, x83, 0xffffffff, &x131);
+ { uint32_t x134; uint8_t x135 = _subborrow_u32(x132, x86, 0xffffffff, &x134);
+ { uint32_t x137; uint8_t x138 = _subborrow_u32(x135, x89, 0xffffffff, &x137);
+ { uint32_t x140; uint8_t x141 = _subborrow_u32(x138, x92, 0xffffffff, &x140);
+ { uint32_t x143; uint8_t x144 = _subborrow_u32(x141, x95, 0xffffffff, &x143);
+ { uint32_t x146; uint8_t x147 = _subborrow_u32(x144, x98, 0xffffffff, &x146);
+ { uint32_t x149; uint8_t x150 = _subborrow_u32(x147, x101, 0xffffffff, &x149);
+ { uint32_t x152; uint8_t x153 = _subborrow_u32(x150, x104, 0xffffffff, &x152);
+ { uint32_t x155; uint8_t x156 = _subborrow_u32(x153, x107, 0xffffffff, &x155);
+ { uint32_t x158; uint8_t x159 = _subborrow_u32(x156, x110, Const 1054736383, &x158);
+ { uint32_t _; uint8_t x162 = _subborrow_u32(x159, x111, 0x0, &_);
+ { uint32_t x163 = cmovznz(x162, x158, x110);
+ { uint32_t x164 = cmovznz(x162, x155, x107);
+ { uint32_t x165 = cmovznz(x162, x152, x104);
+ { uint32_t x166 = cmovznz(x162, x149, x101);
+ { uint32_t x167 = cmovznz(x162, x146, x98);
+ { uint32_t x168 = cmovznz(x162, x143, x95);
+ { uint32_t x169 = cmovznz(x162, x140, x92);
+ { uint32_t x170 = cmovznz(x162, x137, x89);
+ { uint32_t x171 = cmovznz(x162, x134, x86);
+ { uint32_t x172 = cmovznz(x162, x131, x83);
+ { uint32_t x173 = cmovznz(x162, x128, x80);
+ { uint32_t x174 = cmovznz(x162, x125, x77);
+ { uint32_t x175 = cmovznz(x162, x122, x74);
+ { uint32_t x176 = cmovznz(x162, x119, x71);
+ { uint32_t x177 = cmovznz(x162, x116, x68);
+ { uint32_t x178 = cmovznz(x162, x113, x65);
+ out[0] = x178;
+ out[1] = x177;
+ out[2] = x176;
+ out[3] = x175;
+ out[4] = x174;
+ out[5] = x173;
+ out[6] = x172;
+ out[7] = x171;
+ out[8] = x170;
+ out[9] = x169;
+ out[10] = x168;
+ out[11] = x167;
+ out[12] = x166;
+ out[13] = x165;
+ out[14] = x164;
+ out[15] = x163;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e510m290x2e496m1/feaddDisplay.log b/src/Specific/montgomery32_2e510m290x2e496m1/feaddDisplay.log
new file mode 100644
index 000000000..3a28d9b8c
--- /dev/null
+++ b/src/Specific/montgomery32_2e510m290x2e496m1/feaddDisplay.log
@@ -0,0 +1,56 @@
+λ x x0 : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32,
+Interp-η
+(λ var : Syntax.base_type → Type,
+ λ '(x32, x33, x31, x29, x27, x25, x23, x21, x19, x17, x15, x13, x11, x9, x7, x5, (x62, x63, x61, x59, x57, x55, x53, x51, x49, x47, x45, x43, x41, x39, x37, x35))%core,
+ uint32_t x65, uint8_t x66 = addcarryx_u32(0x0, x5, x35);
+ uint32_t x68, uint8_t x69 = addcarryx_u32(x66, x7, x37);
+ uint32_t x71, uint8_t x72 = addcarryx_u32(x69, x9, x39);
+ uint32_t x74, uint8_t x75 = addcarryx_u32(x72, x11, x41);
+ uint32_t x77, uint8_t x78 = addcarryx_u32(x75, x13, x43);
+ uint32_t x80, uint8_t x81 = addcarryx_u32(x78, x15, x45);
+ uint32_t x83, uint8_t x84 = addcarryx_u32(x81, x17, x47);
+ uint32_t x86, uint8_t x87 = addcarryx_u32(x84, x19, x49);
+ uint32_t x89, uint8_t x90 = addcarryx_u32(x87, x21, x51);
+ uint32_t x92, uint8_t x93 = addcarryx_u32(x90, x23, x53);
+ uint32_t x95, uint8_t x96 = addcarryx_u32(x93, x25, x55);
+ uint32_t x98, uint8_t x99 = addcarryx_u32(x96, x27, x57);
+ uint32_t x101, uint8_t x102 = addcarryx_u32(x99, x29, x59);
+ uint32_t x104, uint8_t x105 = addcarryx_u32(x102, x31, x61);
+ uint32_t x107, uint8_t x108 = addcarryx_u32(x105, x33, x63);
+ uint32_t x110, uint8_t x111 = addcarryx_u32(x108, x32, x62);
+ uint32_t x113, uint8_t x114 = subborrow_u32(0x0, x65, 0xffffffff);
+ uint32_t x116, uint8_t x117 = subborrow_u32(x114, x68, 0xffffffff);
+ uint32_t x119, uint8_t x120 = subborrow_u32(x117, x71, 0xffffffff);
+ uint32_t x122, uint8_t x123 = subborrow_u32(x120, x74, 0xffffffff);
+ uint32_t x125, uint8_t x126 = subborrow_u32(x123, x77, 0xffffffff);
+ uint32_t x128, uint8_t x129 = subborrow_u32(x126, x80, 0xffffffff);
+ uint32_t x131, uint8_t x132 = subborrow_u32(x129, x83, 0xffffffff);
+ uint32_t x134, uint8_t x135 = subborrow_u32(x132, x86, 0xffffffff);
+ uint32_t x137, uint8_t x138 = subborrow_u32(x135, x89, 0xffffffff);
+ uint32_t x140, uint8_t x141 = subborrow_u32(x138, x92, 0xffffffff);
+ uint32_t x143, uint8_t x144 = subborrow_u32(x141, x95, 0xffffffff);
+ uint32_t x146, uint8_t x147 = subborrow_u32(x144, x98, 0xffffffff);
+ uint32_t x149, uint8_t x150 = subborrow_u32(x147, x101, 0xffffffff);
+ uint32_t x152, uint8_t x153 = subborrow_u32(x150, x104, 0xffffffff);
+ uint32_t x155, uint8_t x156 = subborrow_u32(x153, x107, 0xffffffff);
+ uint32_t x158, uint8_t x159 = subborrow_u32(x156, x110, Const 1054736383);
+ uint32_t _, uint8_t x162 = subborrow_u32(x159, x111, 0x0);
+ uint32_t x163 = cmovznz(x162, x158, x110);
+ uint32_t x164 = cmovznz(x162, x155, x107);
+ uint32_t x165 = cmovznz(x162, x152, x104);
+ uint32_t x166 = cmovznz(x162, x149, x101);
+ uint32_t x167 = cmovznz(x162, x146, x98);
+ uint32_t x168 = cmovznz(x162, x143, x95);
+ uint32_t x169 = cmovznz(x162, x140, x92);
+ uint32_t x170 = cmovznz(x162, x137, x89);
+ uint32_t x171 = cmovznz(x162, x134, x86);
+ uint32_t x172 = cmovznz(x162, x131, x83);
+ uint32_t x173 = cmovznz(x162, x128, x80);
+ uint32_t x174 = cmovznz(x162, x125, x77);
+ uint32_t x175 = cmovznz(x162, x122, x74);
+ uint32_t x176 = cmovznz(x162, x119, x71);
+ uint32_t x177 = cmovznz(x162, x116, x68);
+ uint32_t x178 = cmovznz(x162, x113, x65);
+ return (x163, x164, x165, x166, x167, x168, x169, x170, x171, x172, x173, x174, x175, x176, x177, x178))
+(x, x0)%core
+ : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t)
diff --git a/src/Specific/montgomery32_2e510m290x2e496m1/fenz.c b/src/Specific/montgomery32_2e510m290x2e496m1/fenz.c
index 9290e38c5..5c5d21b0e 100644
--- a/src/Specific/montgomery32_2e510m290x2e496m1/fenz.c
+++ b/src/Specific/montgomery32_2e510m290x2e496m1/fenz.c
@@ -1,37 +1,35 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x31 = (x30 | x29);
-{ uint32_t x32 = (x28 | x31);
-{ uint32_t x33 = (x26 | x32);
-{ uint32_t x34 = (x24 | x33);
-{ uint32_t x35 = (x22 | x34);
-{ uint32_t x36 = (x20 | x35);
-{ uint32_t x37 = (x18 | x36);
-{ uint32_t x38 = (x16 | x37);
-{ uint32_t x39 = (x14 | x38);
-{ uint32_t x40 = (x12 | x39);
-{ uint32_t x41 = (x10 | x40);
-{ uint32_t x42 = (x8 | x41);
-{ uint32_t x43 = (x6 | x42);
-{ uint32_t x44 = (x4 | x43);
-{ uint32_t x45 = (x2 | x44);
-out[0] = x45;
-}}}}}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x31 = (x30 | x29);
+ { uint32_t x32 = (x28 | x31);
+ { uint32_t x33 = (x26 | x32);
+ { uint32_t x34 = (x24 | x33);
+ { uint32_t x35 = (x22 | x34);
+ { uint32_t x36 = (x20 | x35);
+ { uint32_t x37 = (x18 | x36);
+ { uint32_t x38 = (x16 | x37);
+ { uint32_t x39 = (x14 | x38);
+ { uint32_t x40 = (x12 | x39);
+ { uint32_t x41 = (x10 | x40);
+ { uint32_t x42 = (x8 | x41);
+ { uint32_t x43 = (x6 | x42);
+ { uint32_t x44 = (x4 | x43);
+ { uint32_t x45 = (x2 | x44);
+ out[0] = x45;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e511m187/feadd.c b/src/Specific/montgomery32_2e511m187/feadd.c
index 9dd18938f..c564ec668 100644
--- a/src/Specific/montgomery32_2e511m187/feadd.c
+++ b/src/Specific/montgomery32_2e511m187/feadd.c
@@ -1,86 +1,100 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
-{ uint32_t x65; uint8_t x66 = _addcarryx_u32(0x0, x5, x35, &x65);
-{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x7, x37, &x68);
-{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x9, x39, &x71);
-{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x11, x41, &x74);
-{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x13, x43, &x77);
-{ uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x15, x45, &x80);
-{ uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x17, x47, &x83);
-{ uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x19, x49, &x86);
-{ uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x21, x51, &x89);
-{ uint32_t x92; uint8_t x93 = _addcarryx_u32(x90, x23, x53, &x92);
-{ uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x25, x55, &x95);
-{ uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x27, x57, &x98);
-{ uint32_t x101; uint8_t x102 = _addcarryx_u32(x99, x29, x59, &x101);
-{ uint32_t x104; uint8_t x105 = _addcarryx_u32(x102, x31, x61, &x104);
-{ uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x33, x63, &x107);
-{ uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x32, x62, &x110);
-{ uint32_t x113; uint8_t x114 = _subborrow_u32(0x0, x65, 0xffffff45, &x113);
-{ uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x68, 0xffffffff, &x116);
-{ uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x71, 0xffffffff, &x119);
-{ uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x74, 0xffffffff, &x122);
-{ uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x77, 0xffffffff, &x125);
-{ uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x80, 0xffffffff, &x128);
-{ uint32_t x131; uint8_t x132 = _subborrow_u32(x129, x83, 0xffffffff, &x131);
-{ uint32_t x134; uint8_t x135 = _subborrow_u32(x132, x86, 0xffffffff, &x134);
-{ uint32_t x137; uint8_t x138 = _subborrow_u32(x135, x89, 0xffffffff, &x137);
-{ uint32_t x140; uint8_t x141 = _subborrow_u32(x138, x92, 0xffffffff, &x140);
-{ uint32_t x143; uint8_t x144 = _subborrow_u32(x141, x95, 0xffffffff, &x143);
-{ uint32_t x146; uint8_t x147 = _subborrow_u32(x144, x98, 0xffffffff, &x146);
-{ uint32_t x149; uint8_t x150 = _subborrow_u32(x147, x101, 0xffffffff, &x149);
-{ uint32_t x152; uint8_t x153 = _subborrow_u32(x150, x104, 0xffffffff, &x152);
-{ uint32_t x155; uint8_t x156 = _subborrow_u32(x153, x107, 0xffffffff, &x155);
-{ uint32_t x158; uint8_t x159 = _subborrow_u32(x156, x110, 0x7fffffff, &x158);
-{ uint32_t _; uint8_t x162 = _subborrow_u32(x159, x111, 0x0, &_);
-{ uint32_t x163 = cmovznz(x162, x158, x110);
-{ uint32_t x164 = cmovznz(x162, x155, x107);
-{ uint32_t x165 = cmovznz(x162, x152, x104);
-{ uint32_t x166 = cmovznz(x162, x149, x101);
-{ uint32_t x167 = cmovznz(x162, x146, x98);
-{ uint32_t x168 = cmovznz(x162, x143, x95);
-{ uint32_t x169 = cmovznz(x162, x140, x92);
-{ uint32_t x170 = cmovznz(x162, x137, x89);
-{ uint32_t x171 = cmovznz(x162, x134, x86);
-{ uint32_t x172 = cmovznz(x162, x131, x83);
-{ uint32_t x173 = cmovznz(x162, x128, x80);
-{ uint32_t x174 = cmovznz(x162, x125, x77);
-{ uint32_t x175 = cmovznz(x162, x122, x74);
-{ uint32_t x176 = cmovznz(x162, x119, x71);
-{ uint32_t x177 = cmovznz(x162, x116, x68);
-{ uint32_t x178 = cmovznz(x162, x113, x65);
-out[0] = x163;
-out[1] = x164;
-out[2] = x165;
-out[3] = x166;
-out[4] = x167;
-out[5] = x168;
-out[6] = x169;
-out[7] = x170;
-out[8] = x171;
-out[9] = x172;
-out[10] = x173;
-out[11] = x174;
-out[12] = x175;
-out[13] = x176;
-out[14] = x177;
-out[15] = x178;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[16];
+static void feadd(uint32_t out[16], const uint32_t in1[16], const uint32_t in2[16]) {
+ { const uint32_t x32 = in1[15];
+ { const uint32_t x33 = in1[14];
+ { const uint32_t x31 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x62 = in2[15];
+ { const uint32_t x63 = in2[14];
+ { const uint32_t x61 = in2[13];
+ { const uint32_t x59 = in2[12];
+ { const uint32_t x57 = in2[11];
+ { const uint32_t x55 = in2[10];
+ { const uint32_t x53 = in2[9];
+ { const uint32_t x51 = in2[8];
+ { const uint32_t x49 = in2[7];
+ { const uint32_t x47 = in2[6];
+ { const uint32_t x45 = in2[5];
+ { const uint32_t x43 = in2[4];
+ { const uint32_t x41 = in2[3];
+ { const uint32_t x39 = in2[2];
+ { const uint32_t x37 = in2[1];
+ { const uint32_t x35 = in2[0];
+ { uint32_t x65; uint8_t x66 = _addcarryx_u32(0x0, x5, x35, &x65);
+ { uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x7, x37, &x68);
+ { uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x9, x39, &x71);
+ { uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x11, x41, &x74);
+ { uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x13, x43, &x77);
+ { uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x15, x45, &x80);
+ { uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x17, x47, &x83);
+ { uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x19, x49, &x86);
+ { uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x21, x51, &x89);
+ { uint32_t x92; uint8_t x93 = _addcarryx_u32(x90, x23, x53, &x92);
+ { uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x25, x55, &x95);
+ { uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x27, x57, &x98);
+ { uint32_t x101; uint8_t x102 = _addcarryx_u32(x99, x29, x59, &x101);
+ { uint32_t x104; uint8_t x105 = _addcarryx_u32(x102, x31, x61, &x104);
+ { uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x33, x63, &x107);
+ { uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x32, x62, &x110);
+ { uint32_t x113; uint8_t x114 = _subborrow_u32(0x0, x65, 0xffffff45, &x113);
+ { uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x68, 0xffffffff, &x116);
+ { uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x71, 0xffffffff, &x119);
+ { uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x74, 0xffffffff, &x122);
+ { uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x77, 0xffffffff, &x125);
+ { uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x80, 0xffffffff, &x128);
+ { uint32_t x131; uint8_t x132 = _subborrow_u32(x129, x83, 0xffffffff, &x131);
+ { uint32_t x134; uint8_t x135 = _subborrow_u32(x132, x86, 0xffffffff, &x134);
+ { uint32_t x137; uint8_t x138 = _subborrow_u32(x135, x89, 0xffffffff, &x137);
+ { uint32_t x140; uint8_t x141 = _subborrow_u32(x138, x92, 0xffffffff, &x140);
+ { uint32_t x143; uint8_t x144 = _subborrow_u32(x141, x95, 0xffffffff, &x143);
+ { uint32_t x146; uint8_t x147 = _subborrow_u32(x144, x98, 0xffffffff, &x146);
+ { uint32_t x149; uint8_t x150 = _subborrow_u32(x147, x101, 0xffffffff, &x149);
+ { uint32_t x152; uint8_t x153 = _subborrow_u32(x150, x104, 0xffffffff, &x152);
+ { uint32_t x155; uint8_t x156 = _subborrow_u32(x153, x107, 0xffffffff, &x155);
+ { uint32_t x158; uint8_t x159 = _subborrow_u32(x156, x110, 0x7fffffff, &x158);
+ { uint32_t _; uint8_t x162 = _subborrow_u32(x159, x111, 0x0, &_);
+ { uint32_t x163 = cmovznz(x162, x158, x110);
+ { uint32_t x164 = cmovznz(x162, x155, x107);
+ { uint32_t x165 = cmovznz(x162, x152, x104);
+ { uint32_t x166 = cmovznz(x162, x149, x101);
+ { uint32_t x167 = cmovznz(x162, x146, x98);
+ { uint32_t x168 = cmovznz(x162, x143, x95);
+ { uint32_t x169 = cmovznz(x162, x140, x92);
+ { uint32_t x170 = cmovznz(x162, x137, x89);
+ { uint32_t x171 = cmovznz(x162, x134, x86);
+ { uint32_t x172 = cmovznz(x162, x131, x83);
+ { uint32_t x173 = cmovznz(x162, x128, x80);
+ { uint32_t x174 = cmovznz(x162, x125, x77);
+ { uint32_t x175 = cmovznz(x162, x122, x74);
+ { uint32_t x176 = cmovznz(x162, x119, x71);
+ { uint32_t x177 = cmovznz(x162, x116, x68);
+ { uint32_t x178 = cmovznz(x162, x113, x65);
+ out[0] = x178;
+ out[1] = x177;
+ out[2] = x176;
+ out[3] = x175;
+ out[4] = x174;
+ out[5] = x173;
+ out[6] = x172;
+ out[7] = x171;
+ out[8] = x170;
+ out[9] = x169;
+ out[10] = x168;
+ out[11] = x167;
+ out[12] = x166;
+ out[13] = x165;
+ out[14] = x164;
+ out[15] = x163;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e511m187/fenz.c b/src/Specific/montgomery32_2e511m187/fenz.c
index 9290e38c5..5c5d21b0e 100644
--- a/src/Specific/montgomery32_2e511m187/fenz.c
+++ b/src/Specific/montgomery32_2e511m187/fenz.c
@@ -1,37 +1,35 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x31 = (x30 | x29);
-{ uint32_t x32 = (x28 | x31);
-{ uint32_t x33 = (x26 | x32);
-{ uint32_t x34 = (x24 | x33);
-{ uint32_t x35 = (x22 | x34);
-{ uint32_t x36 = (x20 | x35);
-{ uint32_t x37 = (x18 | x36);
-{ uint32_t x38 = (x16 | x37);
-{ uint32_t x39 = (x14 | x38);
-{ uint32_t x40 = (x12 | x39);
-{ uint32_t x41 = (x10 | x40);
-{ uint32_t x42 = (x8 | x41);
-{ uint32_t x43 = (x6 | x42);
-{ uint32_t x44 = (x4 | x43);
-{ uint32_t x45 = (x2 | x44);
-out[0] = x45;
-}}}}}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x31 = (x30 | x29);
+ { uint32_t x32 = (x28 | x31);
+ { uint32_t x33 = (x26 | x32);
+ { uint32_t x34 = (x24 | x33);
+ { uint32_t x35 = (x22 | x34);
+ { uint32_t x36 = (x20 | x35);
+ { uint32_t x37 = (x18 | x36);
+ { uint32_t x38 = (x16 | x37);
+ { uint32_t x39 = (x14 | x38);
+ { uint32_t x40 = (x12 | x39);
+ { uint32_t x41 = (x10 | x40);
+ { uint32_t x42 = (x8 | x41);
+ { uint32_t x43 = (x6 | x42);
+ { uint32_t x44 = (x4 | x43);
+ { uint32_t x45 = (x2 | x44);
+ out[0] = x45;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e511m481/feadd.c b/src/Specific/montgomery32_2e511m481/feadd.c
index 42b332afd..a15361b34 100644
--- a/src/Specific/montgomery32_2e511m481/feadd.c
+++ b/src/Specific/montgomery32_2e511m481/feadd.c
@@ -1,86 +1,100 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
-{ uint32_t x65; uint8_t x66 = _addcarryx_u32(0x0, x5, x35, &x65);
-{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x7, x37, &x68);
-{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x9, x39, &x71);
-{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x11, x41, &x74);
-{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x13, x43, &x77);
-{ uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x15, x45, &x80);
-{ uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x17, x47, &x83);
-{ uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x19, x49, &x86);
-{ uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x21, x51, &x89);
-{ uint32_t x92; uint8_t x93 = _addcarryx_u32(x90, x23, x53, &x92);
-{ uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x25, x55, &x95);
-{ uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x27, x57, &x98);
-{ uint32_t x101; uint8_t x102 = _addcarryx_u32(x99, x29, x59, &x101);
-{ uint32_t x104; uint8_t x105 = _addcarryx_u32(x102, x31, x61, &x104);
-{ uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x33, x63, &x107);
-{ uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x32, x62, &x110);
-{ uint32_t x113; uint8_t x114 = _subborrow_u32(0x0, x65, 0xfffffe1f, &x113);
-{ uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x68, 0xffffffff, &x116);
-{ uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x71, 0xffffffff, &x119);
-{ uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x74, 0xffffffff, &x122);
-{ uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x77, 0xffffffff, &x125);
-{ uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x80, 0xffffffff, &x128);
-{ uint32_t x131; uint8_t x132 = _subborrow_u32(x129, x83, 0xffffffff, &x131);
-{ uint32_t x134; uint8_t x135 = _subborrow_u32(x132, x86, 0xffffffff, &x134);
-{ uint32_t x137; uint8_t x138 = _subborrow_u32(x135, x89, 0xffffffff, &x137);
-{ uint32_t x140; uint8_t x141 = _subborrow_u32(x138, x92, 0xffffffff, &x140);
-{ uint32_t x143; uint8_t x144 = _subborrow_u32(x141, x95, 0xffffffff, &x143);
-{ uint32_t x146; uint8_t x147 = _subborrow_u32(x144, x98, 0xffffffff, &x146);
-{ uint32_t x149; uint8_t x150 = _subborrow_u32(x147, x101, 0xffffffff, &x149);
-{ uint32_t x152; uint8_t x153 = _subborrow_u32(x150, x104, 0xffffffff, &x152);
-{ uint32_t x155; uint8_t x156 = _subborrow_u32(x153, x107, 0xffffffff, &x155);
-{ uint32_t x158; uint8_t x159 = _subborrow_u32(x156, x110, 0x7fffffff, &x158);
-{ uint32_t _; uint8_t x162 = _subborrow_u32(x159, x111, 0x0, &_);
-{ uint32_t x163 = cmovznz(x162, x158, x110);
-{ uint32_t x164 = cmovznz(x162, x155, x107);
-{ uint32_t x165 = cmovznz(x162, x152, x104);
-{ uint32_t x166 = cmovznz(x162, x149, x101);
-{ uint32_t x167 = cmovznz(x162, x146, x98);
-{ uint32_t x168 = cmovznz(x162, x143, x95);
-{ uint32_t x169 = cmovznz(x162, x140, x92);
-{ uint32_t x170 = cmovznz(x162, x137, x89);
-{ uint32_t x171 = cmovznz(x162, x134, x86);
-{ uint32_t x172 = cmovznz(x162, x131, x83);
-{ uint32_t x173 = cmovznz(x162, x128, x80);
-{ uint32_t x174 = cmovznz(x162, x125, x77);
-{ uint32_t x175 = cmovznz(x162, x122, x74);
-{ uint32_t x176 = cmovznz(x162, x119, x71);
-{ uint32_t x177 = cmovznz(x162, x116, x68);
-{ uint32_t x178 = cmovznz(x162, x113, x65);
-out[0] = x163;
-out[1] = x164;
-out[2] = x165;
-out[3] = x166;
-out[4] = x167;
-out[5] = x168;
-out[6] = x169;
-out[7] = x170;
-out[8] = x171;
-out[9] = x172;
-out[10] = x173;
-out[11] = x174;
-out[12] = x175;
-out[13] = x176;
-out[14] = x177;
-out[15] = x178;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[16];
+static void feadd(uint32_t out[16], const uint32_t in1[16], const uint32_t in2[16]) {
+ { const uint32_t x32 = in1[15];
+ { const uint32_t x33 = in1[14];
+ { const uint32_t x31 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x62 = in2[15];
+ { const uint32_t x63 = in2[14];
+ { const uint32_t x61 = in2[13];
+ { const uint32_t x59 = in2[12];
+ { const uint32_t x57 = in2[11];
+ { const uint32_t x55 = in2[10];
+ { const uint32_t x53 = in2[9];
+ { const uint32_t x51 = in2[8];
+ { const uint32_t x49 = in2[7];
+ { const uint32_t x47 = in2[6];
+ { const uint32_t x45 = in2[5];
+ { const uint32_t x43 = in2[4];
+ { const uint32_t x41 = in2[3];
+ { const uint32_t x39 = in2[2];
+ { const uint32_t x37 = in2[1];
+ { const uint32_t x35 = in2[0];
+ { uint32_t x65; uint8_t x66 = _addcarryx_u32(0x0, x5, x35, &x65);
+ { uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x7, x37, &x68);
+ { uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x9, x39, &x71);
+ { uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x11, x41, &x74);
+ { uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x13, x43, &x77);
+ { uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x15, x45, &x80);
+ { uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x17, x47, &x83);
+ { uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x19, x49, &x86);
+ { uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x21, x51, &x89);
+ { uint32_t x92; uint8_t x93 = _addcarryx_u32(x90, x23, x53, &x92);
+ { uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x25, x55, &x95);
+ { uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x27, x57, &x98);
+ { uint32_t x101; uint8_t x102 = _addcarryx_u32(x99, x29, x59, &x101);
+ { uint32_t x104; uint8_t x105 = _addcarryx_u32(x102, x31, x61, &x104);
+ { uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x33, x63, &x107);
+ { uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x32, x62, &x110);
+ { uint32_t x113; uint8_t x114 = _subborrow_u32(0x0, x65, 0xfffffe1f, &x113);
+ { uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x68, 0xffffffff, &x116);
+ { uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x71, 0xffffffff, &x119);
+ { uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x74, 0xffffffff, &x122);
+ { uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x77, 0xffffffff, &x125);
+ { uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x80, 0xffffffff, &x128);
+ { uint32_t x131; uint8_t x132 = _subborrow_u32(x129, x83, 0xffffffff, &x131);
+ { uint32_t x134; uint8_t x135 = _subborrow_u32(x132, x86, 0xffffffff, &x134);
+ { uint32_t x137; uint8_t x138 = _subborrow_u32(x135, x89, 0xffffffff, &x137);
+ { uint32_t x140; uint8_t x141 = _subborrow_u32(x138, x92, 0xffffffff, &x140);
+ { uint32_t x143; uint8_t x144 = _subborrow_u32(x141, x95, 0xffffffff, &x143);
+ { uint32_t x146; uint8_t x147 = _subborrow_u32(x144, x98, 0xffffffff, &x146);
+ { uint32_t x149; uint8_t x150 = _subborrow_u32(x147, x101, 0xffffffff, &x149);
+ { uint32_t x152; uint8_t x153 = _subborrow_u32(x150, x104, 0xffffffff, &x152);
+ { uint32_t x155; uint8_t x156 = _subborrow_u32(x153, x107, 0xffffffff, &x155);
+ { uint32_t x158; uint8_t x159 = _subborrow_u32(x156, x110, 0x7fffffff, &x158);
+ { uint32_t _; uint8_t x162 = _subborrow_u32(x159, x111, 0x0, &_);
+ { uint32_t x163 = cmovznz(x162, x158, x110);
+ { uint32_t x164 = cmovznz(x162, x155, x107);
+ { uint32_t x165 = cmovznz(x162, x152, x104);
+ { uint32_t x166 = cmovznz(x162, x149, x101);
+ { uint32_t x167 = cmovznz(x162, x146, x98);
+ { uint32_t x168 = cmovznz(x162, x143, x95);
+ { uint32_t x169 = cmovznz(x162, x140, x92);
+ { uint32_t x170 = cmovznz(x162, x137, x89);
+ { uint32_t x171 = cmovznz(x162, x134, x86);
+ { uint32_t x172 = cmovznz(x162, x131, x83);
+ { uint32_t x173 = cmovznz(x162, x128, x80);
+ { uint32_t x174 = cmovznz(x162, x125, x77);
+ { uint32_t x175 = cmovznz(x162, x122, x74);
+ { uint32_t x176 = cmovznz(x162, x119, x71);
+ { uint32_t x177 = cmovznz(x162, x116, x68);
+ { uint32_t x178 = cmovznz(x162, x113, x65);
+ out[0] = x178;
+ out[1] = x177;
+ out[2] = x176;
+ out[3] = x175;
+ out[4] = x174;
+ out[5] = x173;
+ out[6] = x172;
+ out[7] = x171;
+ out[8] = x170;
+ out[9] = x169;
+ out[10] = x168;
+ out[11] = x167;
+ out[12] = x166;
+ out[13] = x165;
+ out[14] = x164;
+ out[15] = x163;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e511m481/fenz.c b/src/Specific/montgomery32_2e511m481/fenz.c
index 9290e38c5..5c5d21b0e 100644
--- a/src/Specific/montgomery32_2e511m481/fenz.c
+++ b/src/Specific/montgomery32_2e511m481/fenz.c
@@ -1,37 +1,35 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x31 = (x30 | x29);
-{ uint32_t x32 = (x28 | x31);
-{ uint32_t x33 = (x26 | x32);
-{ uint32_t x34 = (x24 | x33);
-{ uint32_t x35 = (x22 | x34);
-{ uint32_t x36 = (x20 | x35);
-{ uint32_t x37 = (x18 | x36);
-{ uint32_t x38 = (x16 | x37);
-{ uint32_t x39 = (x14 | x38);
-{ uint32_t x40 = (x12 | x39);
-{ uint32_t x41 = (x10 | x40);
-{ uint32_t x42 = (x8 | x41);
-{ uint32_t x43 = (x6 | x42);
-{ uint32_t x44 = (x4 | x43);
-{ uint32_t x45 = (x2 | x44);
-out[0] = x45;
-}}}}}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x31 = (x30 | x29);
+ { uint32_t x32 = (x28 | x31);
+ { uint32_t x33 = (x26 | x32);
+ { uint32_t x34 = (x24 | x33);
+ { uint32_t x35 = (x22 | x34);
+ { uint32_t x36 = (x20 | x35);
+ { uint32_t x37 = (x18 | x36);
+ { uint32_t x38 = (x16 | x37);
+ { uint32_t x39 = (x14 | x38);
+ { uint32_t x40 = (x12 | x39);
+ { uint32_t x41 = (x10 | x40);
+ { uint32_t x42 = (x8 | x41);
+ { uint32_t x43 = (x6 | x42);
+ { uint32_t x44 = (x4 | x43);
+ { uint32_t x45 = (x2 | x44);
+ out[0] = x45;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e512m491x2e496m1/feadd.c b/src/Specific/montgomery32_2e512m491x2e496m1/feadd.c
new file mode 100644
index 000000000..bf23193ff
--- /dev/null
+++ b/src/Specific/montgomery32_2e512m491x2e496m1/feadd.c
@@ -0,0 +1,100 @@
+static void feadd(uint32_t out[16], const uint32_t in1[16], const uint32_t in2[16]) {
+ { const uint32_t x32 = in1[15];
+ { const uint32_t x33 = in1[14];
+ { const uint32_t x31 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x62 = in2[15];
+ { const uint32_t x63 = in2[14];
+ { const uint32_t x61 = in2[13];
+ { const uint32_t x59 = in2[12];
+ { const uint32_t x57 = in2[11];
+ { const uint32_t x55 = in2[10];
+ { const uint32_t x53 = in2[9];
+ { const uint32_t x51 = in2[8];
+ { const uint32_t x49 = in2[7];
+ { const uint32_t x47 = in2[6];
+ { const uint32_t x45 = in2[5];
+ { const uint32_t x43 = in2[4];
+ { const uint32_t x41 = in2[3];
+ { const uint32_t x39 = in2[2];
+ { const uint32_t x37 = in2[1];
+ { const uint32_t x35 = in2[0];
+ { uint32_t x65; uint8_t x66 = _addcarryx_u32(0x0, x5, x35, &x65);
+ { uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x7, x37, &x68);
+ { uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x9, x39, &x71);
+ { uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x11, x41, &x74);
+ { uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x13, x43, &x77);
+ { uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x15, x45, &x80);
+ { uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x17, x47, &x83);
+ { uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x19, x49, &x86);
+ { uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x21, x51, &x89);
+ { uint32_t x92; uint8_t x93 = _addcarryx_u32(x90, x23, x53, &x92);
+ { uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x25, x55, &x95);
+ { uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x27, x57, &x98);
+ { uint32_t x101; uint8_t x102 = _addcarryx_u32(x99, x29, x59, &x101);
+ { uint32_t x104; uint8_t x105 = _addcarryx_u32(x102, x31, x61, &x104);
+ { uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x33, x63, &x107);
+ { uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x32, x62, &x110);
+ { uint32_t x113; uint8_t x114 = _subborrow_u32(0x0, x65, 0xffffffff, &x113);
+ { uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x68, 0xffffffff, &x116);
+ { uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x71, 0xffffffff, &x119);
+ { uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x74, 0xffffffff, &x122);
+ { uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x77, 0xffffffff, &x125);
+ { uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x80, 0xffffffff, &x128);
+ { uint32_t x131; uint8_t x132 = _subborrow_u32(x129, x83, 0xffffffff, &x131);
+ { uint32_t x134; uint8_t x135 = _subborrow_u32(x132, x86, 0xffffffff, &x134);
+ { uint32_t x137; uint8_t x138 = _subborrow_u32(x135, x89, 0xffffffff, &x137);
+ { uint32_t x140; uint8_t x141 = _subborrow_u32(x138, x92, 0xffffffff, &x140);
+ { uint32_t x143; uint8_t x144 = _subborrow_u32(x141, x95, 0xffffffff, &x143);
+ { uint32_t x146; uint8_t x147 = _subborrow_u32(x144, x98, 0xffffffff, &x146);
+ { uint32_t x149; uint8_t x150 = _subborrow_u32(x147, x101, 0xffffffff, &x149);
+ { uint32_t x152; uint8_t x153 = _subborrow_u32(x150, x104, 0xffffffff, &x152);
+ { uint32_t x155; uint8_t x156 = _subborrow_u32(x153, x107, 0xffffffff, &x155);
+ { uint32_t x158; uint8_t x159 = _subborrow_u32(x156, x110, 0xfe14ffff, &x158);
+ { uint32_t _; uint8_t x162 = _subborrow_u32(x159, x111, 0x0, &_);
+ { uint32_t x163 = cmovznz(x162, x158, x110);
+ { uint32_t x164 = cmovznz(x162, x155, x107);
+ { uint32_t x165 = cmovznz(x162, x152, x104);
+ { uint32_t x166 = cmovznz(x162, x149, x101);
+ { uint32_t x167 = cmovznz(x162, x146, x98);
+ { uint32_t x168 = cmovznz(x162, x143, x95);
+ { uint32_t x169 = cmovznz(x162, x140, x92);
+ { uint32_t x170 = cmovznz(x162, x137, x89);
+ { uint32_t x171 = cmovznz(x162, x134, x86);
+ { uint32_t x172 = cmovznz(x162, x131, x83);
+ { uint32_t x173 = cmovznz(x162, x128, x80);
+ { uint32_t x174 = cmovznz(x162, x125, x77);
+ { uint32_t x175 = cmovznz(x162, x122, x74);
+ { uint32_t x176 = cmovznz(x162, x119, x71);
+ { uint32_t x177 = cmovznz(x162, x116, x68);
+ { uint32_t x178 = cmovznz(x162, x113, x65);
+ out[0] = x178;
+ out[1] = x177;
+ out[2] = x176;
+ out[3] = x175;
+ out[4] = x174;
+ out[5] = x173;
+ out[6] = x172;
+ out[7] = x171;
+ out[8] = x170;
+ out[9] = x169;
+ out[10] = x168;
+ out[11] = x167;
+ out[12] = x166;
+ out[13] = x165;
+ out[14] = x164;
+ out[15] = x163;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e512m491x2e496m1/feaddDisplay.log b/src/Specific/montgomery32_2e512m491x2e496m1/feaddDisplay.log
index a5f817d5b..9b0e914e5 100644
--- a/src/Specific/montgomery32_2e512m491x2e496m1/feaddDisplay.log
+++ b/src/Specific/montgomery32_2e512m491x2e496m1/feaddDisplay.log
@@ -33,7 +33,7 @@ Interp-η
uint32_t x149, uint8_t x150 = subborrow_u32(x147, x101, 0xffffffff);
uint32_t x152, uint8_t x153 = subborrow_u32(x150, x104, 0xffffffff);
uint32_t x155, uint8_t x156 = subborrow_u32(x153, x107, 0xffffffff);
- uint32_t x158, uint8_t x159 = subborrow_u32(x156, x110, Const 4262789119);
+ uint32_t x158, uint8_t x159 = subborrow_u32(x156, x110, 0xfe14ffff);
uint32_t _, uint8_t x162 = subborrow_u32(x159, x111, 0x0);
uint32_t x163 = cmovznz(x162, x158, x110);
uint32_t x164 = cmovznz(x162, x155, x107);
diff --git a/src/Specific/montgomery32_2e512m491x2e496m1/fenz.c b/src/Specific/montgomery32_2e512m491x2e496m1/fenz.c
index 9290e38c5..5c5d21b0e 100644
--- a/src/Specific/montgomery32_2e512m491x2e496m1/fenz.c
+++ b/src/Specific/montgomery32_2e512m491x2e496m1/fenz.c
@@ -1,37 +1,35 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x31 = (x30 | x29);
-{ uint32_t x32 = (x28 | x31);
-{ uint32_t x33 = (x26 | x32);
-{ uint32_t x34 = (x24 | x33);
-{ uint32_t x35 = (x22 | x34);
-{ uint32_t x36 = (x20 | x35);
-{ uint32_t x37 = (x18 | x36);
-{ uint32_t x38 = (x16 | x37);
-{ uint32_t x39 = (x14 | x38);
-{ uint32_t x40 = (x12 | x39);
-{ uint32_t x41 = (x10 | x40);
-{ uint32_t x42 = (x8 | x41);
-{ uint32_t x43 = (x6 | x42);
-{ uint32_t x44 = (x4 | x43);
-{ uint32_t x45 = (x2 | x44);
-out[0] = x45;
-}}}}}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x31 = (x30 | x29);
+ { uint32_t x32 = (x28 | x31);
+ { uint32_t x33 = (x26 | x32);
+ { uint32_t x34 = (x24 | x33);
+ { uint32_t x35 = (x22 | x34);
+ { uint32_t x36 = (x20 | x35);
+ { uint32_t x37 = (x18 | x36);
+ { uint32_t x38 = (x16 | x37);
+ { uint32_t x39 = (x14 | x38);
+ { uint32_t x40 = (x12 | x39);
+ { uint32_t x41 = (x10 | x40);
+ { uint32_t x42 = (x8 | x41);
+ { uint32_t x43 = (x6 | x42);
+ { uint32_t x44 = (x4 | x43);
+ { uint32_t x45 = (x2 | x44);
+ out[0] = x45;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e512m569/feadd.c b/src/Specific/montgomery32_2e512m569/feadd.c
index 261722f4c..2fa2de575 100644
--- a/src/Specific/montgomery32_2e512m569/feadd.c
+++ b/src/Specific/montgomery32_2e512m569/feadd.c
@@ -1,86 +1,100 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
-{ uint32_t x65; uint8_t x66 = _addcarryx_u32(0x0, x5, x35, &x65);
-{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x7, x37, &x68);
-{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x9, x39, &x71);
-{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x11, x41, &x74);
-{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x13, x43, &x77);
-{ uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x15, x45, &x80);
-{ uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x17, x47, &x83);
-{ uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x19, x49, &x86);
-{ uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x21, x51, &x89);
-{ uint32_t x92; uint8_t x93 = _addcarryx_u32(x90, x23, x53, &x92);
-{ uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x25, x55, &x95);
-{ uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x27, x57, &x98);
-{ uint32_t x101; uint8_t x102 = _addcarryx_u32(x99, x29, x59, &x101);
-{ uint32_t x104; uint8_t x105 = _addcarryx_u32(x102, x31, x61, &x104);
-{ uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x33, x63, &x107);
-{ uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x32, x62, &x110);
-{ uint32_t x113; uint8_t x114 = _subborrow_u32(0x0, x65, 0xfffffdc7, &x113);
-{ uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x68, 0xffffffff, &x116);
-{ uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x71, 0xffffffff, &x119);
-{ uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x74, 0xffffffff, &x122);
-{ uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x77, 0xffffffff, &x125);
-{ uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x80, 0xffffffff, &x128);
-{ uint32_t x131; uint8_t x132 = _subborrow_u32(x129, x83, 0xffffffff, &x131);
-{ uint32_t x134; uint8_t x135 = _subborrow_u32(x132, x86, 0xffffffff, &x134);
-{ uint32_t x137; uint8_t x138 = _subborrow_u32(x135, x89, 0xffffffff, &x137);
-{ uint32_t x140; uint8_t x141 = _subborrow_u32(x138, x92, 0xffffffff, &x140);
-{ uint32_t x143; uint8_t x144 = _subborrow_u32(x141, x95, 0xffffffff, &x143);
-{ uint32_t x146; uint8_t x147 = _subborrow_u32(x144, x98, 0xffffffff, &x146);
-{ uint32_t x149; uint8_t x150 = _subborrow_u32(x147, x101, 0xffffffff, &x149);
-{ uint32_t x152; uint8_t x153 = _subborrow_u32(x150, x104, 0xffffffff, &x152);
-{ uint32_t x155; uint8_t x156 = _subborrow_u32(x153, x107, 0xffffffff, &x155);
-{ uint32_t x158; uint8_t x159 = _subborrow_u32(x156, x110, 0xffffffff, &x158);
-{ uint32_t _; uint8_t x162 = _subborrow_u32(x159, x111, 0x0, &_);
-{ uint32_t x163 = cmovznz(x162, x158, x110);
-{ uint32_t x164 = cmovznz(x162, x155, x107);
-{ uint32_t x165 = cmovznz(x162, x152, x104);
-{ uint32_t x166 = cmovznz(x162, x149, x101);
-{ uint32_t x167 = cmovznz(x162, x146, x98);
-{ uint32_t x168 = cmovznz(x162, x143, x95);
-{ uint32_t x169 = cmovznz(x162, x140, x92);
-{ uint32_t x170 = cmovznz(x162, x137, x89);
-{ uint32_t x171 = cmovznz(x162, x134, x86);
-{ uint32_t x172 = cmovznz(x162, x131, x83);
-{ uint32_t x173 = cmovznz(x162, x128, x80);
-{ uint32_t x174 = cmovznz(x162, x125, x77);
-{ uint32_t x175 = cmovznz(x162, x122, x74);
-{ uint32_t x176 = cmovznz(x162, x119, x71);
-{ uint32_t x177 = cmovznz(x162, x116, x68);
-{ uint32_t x178 = cmovznz(x162, x113, x65);
-out[0] = x163;
-out[1] = x164;
-out[2] = x165;
-out[3] = x166;
-out[4] = x167;
-out[5] = x168;
-out[6] = x169;
-out[7] = x170;
-out[8] = x171;
-out[9] = x172;
-out[10] = x173;
-out[11] = x174;
-out[12] = x175;
-out[13] = x176;
-out[14] = x177;
-out[15] = x178;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[16];
+static void feadd(uint32_t out[16], const uint32_t in1[16], const uint32_t in2[16]) {
+ { const uint32_t x32 = in1[15];
+ { const uint32_t x33 = in1[14];
+ { const uint32_t x31 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x62 = in2[15];
+ { const uint32_t x63 = in2[14];
+ { const uint32_t x61 = in2[13];
+ { const uint32_t x59 = in2[12];
+ { const uint32_t x57 = in2[11];
+ { const uint32_t x55 = in2[10];
+ { const uint32_t x53 = in2[9];
+ { const uint32_t x51 = in2[8];
+ { const uint32_t x49 = in2[7];
+ { const uint32_t x47 = in2[6];
+ { const uint32_t x45 = in2[5];
+ { const uint32_t x43 = in2[4];
+ { const uint32_t x41 = in2[3];
+ { const uint32_t x39 = in2[2];
+ { const uint32_t x37 = in2[1];
+ { const uint32_t x35 = in2[0];
+ { uint32_t x65; uint8_t x66 = _addcarryx_u32(0x0, x5, x35, &x65);
+ { uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x7, x37, &x68);
+ { uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x9, x39, &x71);
+ { uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x11, x41, &x74);
+ { uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x13, x43, &x77);
+ { uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x15, x45, &x80);
+ { uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x17, x47, &x83);
+ { uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x19, x49, &x86);
+ { uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x21, x51, &x89);
+ { uint32_t x92; uint8_t x93 = _addcarryx_u32(x90, x23, x53, &x92);
+ { uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x25, x55, &x95);
+ { uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x27, x57, &x98);
+ { uint32_t x101; uint8_t x102 = _addcarryx_u32(x99, x29, x59, &x101);
+ { uint32_t x104; uint8_t x105 = _addcarryx_u32(x102, x31, x61, &x104);
+ { uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x33, x63, &x107);
+ { uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x32, x62, &x110);
+ { uint32_t x113; uint8_t x114 = _subborrow_u32(0x0, x65, 0xfffffdc7, &x113);
+ { uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x68, 0xffffffff, &x116);
+ { uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x71, 0xffffffff, &x119);
+ { uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x74, 0xffffffff, &x122);
+ { uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x77, 0xffffffff, &x125);
+ { uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x80, 0xffffffff, &x128);
+ { uint32_t x131; uint8_t x132 = _subborrow_u32(x129, x83, 0xffffffff, &x131);
+ { uint32_t x134; uint8_t x135 = _subborrow_u32(x132, x86, 0xffffffff, &x134);
+ { uint32_t x137; uint8_t x138 = _subborrow_u32(x135, x89, 0xffffffff, &x137);
+ { uint32_t x140; uint8_t x141 = _subborrow_u32(x138, x92, 0xffffffff, &x140);
+ { uint32_t x143; uint8_t x144 = _subborrow_u32(x141, x95, 0xffffffff, &x143);
+ { uint32_t x146; uint8_t x147 = _subborrow_u32(x144, x98, 0xffffffff, &x146);
+ { uint32_t x149; uint8_t x150 = _subborrow_u32(x147, x101, 0xffffffff, &x149);
+ { uint32_t x152; uint8_t x153 = _subborrow_u32(x150, x104, 0xffffffff, &x152);
+ { uint32_t x155; uint8_t x156 = _subborrow_u32(x153, x107, 0xffffffff, &x155);
+ { uint32_t x158; uint8_t x159 = _subborrow_u32(x156, x110, 0xffffffff, &x158);
+ { uint32_t _; uint8_t x162 = _subborrow_u32(x159, x111, 0x0, &_);
+ { uint32_t x163 = cmovznz(x162, x158, x110);
+ { uint32_t x164 = cmovznz(x162, x155, x107);
+ { uint32_t x165 = cmovznz(x162, x152, x104);
+ { uint32_t x166 = cmovznz(x162, x149, x101);
+ { uint32_t x167 = cmovznz(x162, x146, x98);
+ { uint32_t x168 = cmovznz(x162, x143, x95);
+ { uint32_t x169 = cmovznz(x162, x140, x92);
+ { uint32_t x170 = cmovznz(x162, x137, x89);
+ { uint32_t x171 = cmovznz(x162, x134, x86);
+ { uint32_t x172 = cmovznz(x162, x131, x83);
+ { uint32_t x173 = cmovznz(x162, x128, x80);
+ { uint32_t x174 = cmovznz(x162, x125, x77);
+ { uint32_t x175 = cmovznz(x162, x122, x74);
+ { uint32_t x176 = cmovznz(x162, x119, x71);
+ { uint32_t x177 = cmovznz(x162, x116, x68);
+ { uint32_t x178 = cmovznz(x162, x113, x65);
+ out[0] = x178;
+ out[1] = x177;
+ out[2] = x176;
+ out[3] = x175;
+ out[4] = x174;
+ out[5] = x173;
+ out[6] = x172;
+ out[7] = x171;
+ out[8] = x170;
+ out[9] = x169;
+ out[10] = x168;
+ out[11] = x167;
+ out[12] = x166;
+ out[13] = x165;
+ out[14] = x164;
+ out[15] = x163;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e512m569/fenz.c b/src/Specific/montgomery32_2e512m569/fenz.c
index 9290e38c5..5c5d21b0e 100644
--- a/src/Specific/montgomery32_2e512m569/fenz.c
+++ b/src/Specific/montgomery32_2e512m569/fenz.c
@@ -1,37 +1,35 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x31 = (x30 | x29);
-{ uint32_t x32 = (x28 | x31);
-{ uint32_t x33 = (x26 | x32);
-{ uint32_t x34 = (x24 | x33);
-{ uint32_t x35 = (x22 | x34);
-{ uint32_t x36 = (x20 | x35);
-{ uint32_t x37 = (x18 | x36);
-{ uint32_t x38 = (x16 | x37);
-{ uint32_t x39 = (x14 | x38);
-{ uint32_t x40 = (x12 | x39);
-{ uint32_t x41 = (x10 | x40);
-{ uint32_t x42 = (x8 | x41);
-{ uint32_t x43 = (x6 | x42);
-{ uint32_t x44 = (x4 | x43);
-{ uint32_t x45 = (x2 | x44);
-out[0] = x45;
-}}}}}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x31 = (x30 | x29);
+ { uint32_t x32 = (x28 | x31);
+ { uint32_t x33 = (x26 | x32);
+ { uint32_t x34 = (x24 | x33);
+ { uint32_t x35 = (x22 | x34);
+ { uint32_t x36 = (x20 | x35);
+ { uint32_t x37 = (x18 | x36);
+ { uint32_t x38 = (x16 | x37);
+ { uint32_t x39 = (x14 | x38);
+ { uint32_t x40 = (x12 | x39);
+ { uint32_t x41 = (x10 | x40);
+ { uint32_t x42 = (x8 | x41);
+ { uint32_t x43 = (x6 | x42);
+ { uint32_t x44 = (x4 | x43);
+ { uint32_t x45 = (x2 | x44);
+ out[0] = x45;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery32_2e521m1/fenz.c b/src/Specific/montgomery32_2e521m1/fenz.c
index 50ef7bf8b..fdbe458c3 100644
--- a/src/Specific/montgomery32_2e521m1/fenz.c
+++ b/src/Specific/montgomery32_2e521m1/fenz.c
@@ -1,38 +1,37 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x31, uint64_t x32, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint32_t x33 = (x32 | x31);
-{ uint32_t x34 = (x30 | x33);
-{ uint32_t x35 = (x28 | x34);
-{ uint32_t x36 = (x26 | x35);
-{ uint32_t x37 = (x24 | x36);
-{ uint32_t x38 = (x22 | x37);
-{ uint32_t x39 = (x20 | x38);
-{ uint32_t x40 = (x18 | x39);
-{ uint32_t x41 = (x16 | x40);
-{ uint32_t x42 = (x14 | x41);
-{ uint32_t x43 = (x12 | x42);
-{ uint32_t x44 = (x10 | x43);
-{ uint32_t x45 = (x8 | x44);
-{ uint32_t x46 = (x6 | x45);
-{ uint32_t x47 = (x4 | x46);
-{ uint32_t x48 = (x2 | x47);
-out[0] = x48;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint32_t out[1], const uint32_t in1[17]) {
+ { const uint32_t x31 = in1[16];
+ { const uint32_t x32 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x33 = (x32 | x31);
+ { uint32_t x34 = (x30 | x33);
+ { uint32_t x35 = (x28 | x34);
+ { uint32_t x36 = (x26 | x35);
+ { uint32_t x37 = (x24 | x36);
+ { uint32_t x38 = (x22 | x37);
+ { uint32_t x39 = (x20 | x38);
+ { uint32_t x40 = (x18 | x39);
+ { uint32_t x41 = (x16 | x40);
+ { uint32_t x42 = (x14 | x41);
+ { uint32_t x43 = (x12 | x42);
+ { uint32_t x44 = (x10 | x43);
+ { uint32_t x45 = (x8 | x44);
+ { uint32_t x46 = (x6 | x45);
+ { uint32_t x47 = (x4 | x46);
+ { uint32_t x48 = (x2 | x47);
+ out[0] = x48;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e127m1/feadd.c b/src/Specific/montgomery64_2e127m1/feadd.c
index f2b1d1573..10648472f 100644
--- a/src/Specific/montgomery64_2e127m1/feadd.c
+++ b/src/Specific/montgomery64_2e127m1/feadd.c
@@ -1,30 +1,16 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x4, uint64_t x5, uint64_t x6, uint64_t x7)
-{ uint64_t x9; uint8_t x10 = _addcarryx_u64(0x0, x5, x7, &x9);
-{ uint64_t x12; uint8_t x13 = _addcarryx_u64(x10, x4, x6, &x12);
-{ uint64_t x15; uint8_t x16 = _subborrow_u64(0x0, x9, 0xffffffffffffffffL, &x15);
-{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, x12, 0x7fffffffffffffffL, &x18);
-{ uint64_t _; uint8_t x22 = _subborrow_u64(x19, x13, 0x0, &_);
-{ uint64_t x23 = cmovznz(x22, x18, x12);
-{ uint64_t x24 = cmovznz(x22, x15, x9);
-out[0] = x23;
-out[1] = x24;
-}}}}}}}
-// caller: uint64_t out[2];
+static void feadd(uint64_t out[2], const uint64_t in1[2], const uint64_t in2[2]) {
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x6 = in2[1];
+ { const uint64_t x7 = in2[0];
+ { uint64_t x9; uint8_t x10 = _addcarryx_u64(0x0, x5, x7, &x9);
+ { uint64_t x12; uint8_t x13 = _addcarryx_u64(x10, x4, x6, &x12);
+ { uint64_t x15; uint8_t x16 = _subborrow_u64(0x0, x9, 0xffffffffffffffffL, &x15);
+ { uint64_t x18; uint8_t x19 = _subborrow_u64(x16, x12, 0x7fffffffffffffffL, &x18);
+ { uint64_t _; uint8_t x22 = _subborrow_u64(x19, x13, 0x0, &_);
+ { uint64_t x23 = cmovznz(x22, x18, x12);
+ { uint64_t x24 = cmovznz(x22, x15, x9);
+ out[0] = x24;
+ out[1] = x23;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e127m1/femul.c b/src/Specific/montgomery64_2e127m1/femul.c
index 3f5813e56..121f8837c 100644
--- a/src/Specific/montgomery64_2e127m1/femul.c
+++ b/src/Specific/montgomery64_2e127m1/femul.c
@@ -1,54 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x4, uint64_t x5, uint64_t x6, uint64_t x7)
-{ uint64_t x10; uint64_t x9 = _mulx_u64(x5, x7, &x10);
-{ uint64_t x13; uint64_t x12 = _mulx_u64(x5, x6, &x13);
-{ uint64_t x15; uint8_t x16 = _addcarryx_u64(0x0, x10, x12, &x15);
-{ uint64_t x18; uint8_t _ = _addcarryx_u64(0x0, x16, x13, &x18);
-{ uint64_t x22; uint64_t x21 = _mulx_u64(x9, 0xffffffffffffffffL, &x22);
-{ uint64_t x25; uint64_t x24 = _mulx_u64(x9, 0x7fffffffffffffffL, &x25);
-{ uint64_t x27; uint8_t x28 = _addcarryx_u64(0x0, x22, x24, &x27);
-{ uint64_t x30; uint8_t _ = _addcarryx_u64(0x0, x28, x25, &x30);
-{ uint64_t _; uint8_t x34 = _addcarryx_u64(0x0, x9, x21, &_);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x15, x27, &x36);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x18, x30, &x39);
-{ uint64_t x43; uint64_t x42 = _mulx_u64(x4, x7, &x43);
-{ uint64_t x46; uint64_t x45 = _mulx_u64(x4, x6, &x46);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(0x0, x43, x45, &x48);
-{ uint64_t x51; uint8_t _ = _addcarryx_u64(0x0, x49, x46, &x51);
-{ uint64_t x54; uint8_t x55 = _addcarryx_u64(0x0, x36, x42, &x54);
-{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x55, x39, x48, &x57);
-{ uint64_t x60; uint8_t x61 = _addcarryx_u64(x58, x40, x51, &x60);
-{ uint64_t x64; uint64_t x63 = _mulx_u64(x54, 0xffffffffffffffffL, &x64);
-{ uint64_t x67; uint64_t x66 = _mulx_u64(x54, 0x7fffffffffffffffL, &x67);
-{ uint64_t x69; uint8_t x70 = _addcarryx_u64(0x0, x64, x66, &x69);
-{ uint64_t x72; uint8_t _ = _addcarryx_u64(0x0, x70, x67, &x72);
-{ uint64_t _; uint8_t x76 = _addcarryx_u64(0x0, x54, x63, &_);
-{ uint64_t x78; uint8_t x79 = _addcarryx_u64(x76, x57, x69, &x78);
-{ uint64_t x81; uint8_t x82 = _addcarryx_u64(x79, x60, x72, &x81);
-{ uint8_t x83 = (x82 + x61);
-{ uint64_t x85; uint8_t x86 = _subborrow_u64(0x0, x78, 0xffffffffffffffffL, &x85);
-{ uint64_t x88; uint8_t x89 = _subborrow_u64(x86, x81, 0x7fffffffffffffffL, &x88);
-{ uint64_t _; uint8_t x92 = _subborrow_u64(x89, x83, 0x0, &_);
-{ uint64_t x93 = cmovznz(x92, x88, x81);
-{ uint64_t x94 = cmovznz(x92, x85, x78);
-out[0] = x93;
-out[1] = x94;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[2];
+static void femul(uint64_t out[2], const uint64_t in1[2], const uint64_t in2[2]) {
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x6 = in2[1];
+ { const uint64_t x7 = in2[0];
+ { uint64_t x10; uint64_t x9 = _mulx_u64(x5, x7, &x10);
+ { uint64_t x13; uint64_t x12 = _mulx_u64(x5, x6, &x13);
+ { uint64_t x15; uint8_t x16 = _addcarryx_u64(0x0, x10, x12, &x15);
+ { uint64_t x18; uint8_t _ = _addcarryx_u64(0x0, x16, x13, &x18);
+ { uint64_t x22; uint64_t x21 = _mulx_u64(x9, 0xffffffffffffffffL, &x22);
+ { uint64_t x25; uint64_t x24 = _mulx_u64(x9, 0x7fffffffffffffffL, &x25);
+ { uint64_t x27; uint8_t x28 = _addcarryx_u64(0x0, x22, x24, &x27);
+ { uint64_t x30; uint8_t _ = _addcarryx_u64(0x0, x28, x25, &x30);
+ { uint64_t _; uint8_t x34 = _addcarryx_u64(0x0, x9, x21, &_);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x15, x27, &x36);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x18, x30, &x39);
+ { uint64_t x43; uint64_t x42 = _mulx_u64(x4, x7, &x43);
+ { uint64_t x46; uint64_t x45 = _mulx_u64(x4, x6, &x46);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(0x0, x43, x45, &x48);
+ { uint64_t x51; uint8_t _ = _addcarryx_u64(0x0, x49, x46, &x51);
+ { uint64_t x54; uint8_t x55 = _addcarryx_u64(0x0, x36, x42, &x54);
+ { uint64_t x57; uint8_t x58 = _addcarryx_u64(x55, x39, x48, &x57);
+ { uint64_t x60; uint8_t x61 = _addcarryx_u64(x58, x40, x51, &x60);
+ { uint64_t x64; uint64_t x63 = _mulx_u64(x54, 0xffffffffffffffffL, &x64);
+ { uint64_t x67; uint64_t x66 = _mulx_u64(x54, 0x7fffffffffffffffL, &x67);
+ { uint64_t x69; uint8_t x70 = _addcarryx_u64(0x0, x64, x66, &x69);
+ { uint64_t x72; uint8_t _ = _addcarryx_u64(0x0, x70, x67, &x72);
+ { uint64_t _; uint8_t x76 = _addcarryx_u64(0x0, x54, x63, &_);
+ { uint64_t x78; uint8_t x79 = _addcarryx_u64(x76, x57, x69, &x78);
+ { uint64_t x81; uint8_t x82 = _addcarryx_u64(x79, x60, x72, &x81);
+ { uint8_t x83 = (x82 + x61);
+ { uint64_t x85; uint8_t x86 = _subborrow_u64(0x0, x78, 0xffffffffffffffffL, &x85);
+ { uint64_t x88; uint8_t x89 = _subborrow_u64(x86, x81, 0x7fffffffffffffffL, &x88);
+ { uint64_t _; uint8_t x92 = _subborrow_u64(x89, x83, 0x0, &_);
+ { uint64_t x93 = cmovznz(x92, x88, x81);
+ { uint64_t x94 = cmovznz(x92, x85, x78);
+ out[0] = x94;
+ out[1] = x93;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e127m1/fenz.c b/src/Specific/montgomery64_2e127m1/fenz.c
index a5550fc28..3b61c77b8 100644
--- a/src/Specific/montgomery64_2e127m1/fenz.c
+++ b/src/Specific/montgomery64_2e127m1/fenz.c
@@ -1,23 +1,7 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x1, uint64_t x2)
-{ uint64_t x3 = (x2 | x1);
-out[0] = x3;
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[2]) {
+ { const uint64_t x1 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x3 = (x2 | x1);
+ out[0] = x3;
+ }}}
}
-// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e127m1/feopp.c b/src/Specific/montgomery64_2e127m1/feopp.c
index 94cb256e1..954c3c768 100644
--- a/src/Specific/montgomery64_2e127m1/feopp.c
+++ b/src/Specific/montgomery64_2e127m1/feopp.c
@@ -1,30 +1,14 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x1, uint64_t x2)
-{ uint64_t x4; uint8_t x5 = _subborrow_u64(0x0, 0x0, x2, &x4);
-{ uint64_t x7; uint8_t x8 = _subborrow_u64(x5, 0x0, x1, &x7);
-{ uint64_t x9 = (uint64_t)cmovznz(x8, 0x0, 0xffffffffffffffffL);
-{ uint64_t x10 = (x9 & 0xffffffffffffffffL);
-{ uint64_t x12; uint8_t x13 = _addcarryx_u64(0x0, x4, x10, &x12);
-{ uint64_t x14 = (x9 & 0x7fffffffffffffffL);
-{ uint64_t x16; uint8_t _ = _addcarryx_u64(x13, x7, x14, &x16);
-out[0] = x16;
-out[1] = x12;
-}}}}}}}
-// caller: uint64_t out[2];
+static void feopp(uint64_t out[2], const uint64_t in1[2]) {
+ { const uint64_t x1 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x4; uint8_t x5 = _subborrow_u64(0x0, 0x0, x2, &x4);
+ { uint64_t x7; uint8_t x8 = _subborrow_u64(x5, 0x0, x1, &x7);
+ { uint64_t x9 = (uint64_t)cmovznz(x8, 0x0, 0xffffffffffffffffL);
+ { uint64_t x10 = (x9 & 0xffffffffffffffffL);
+ { uint64_t x12; uint8_t x13 = _addcarryx_u64(0x0, x4, x10, &x12);
+ { uint64_t x14 = (x9 & 0x7fffffffffffffffL);
+ { uint64_t x16; uint8_t _ = _addcarryx_u64(x13, x7, x14, &x16);
+ out[0] = x12;
+ out[1] = x16;
+ }}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e127m1/fesub.c b/src/Specific/montgomery64_2e127m1/fesub.c
index 10326337a..e3dedbcb8 100644
--- a/src/Specific/montgomery64_2e127m1/fesub.c
+++ b/src/Specific/montgomery64_2e127m1/fesub.c
@@ -1,30 +1,16 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x4, uint64_t x5, uint64_t x6, uint64_t x7)
-{ uint64_t x9; uint8_t x10 = _subborrow_u64(0x0, x5, x7, &x9);
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, x4, x6, &x12);
-{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
-{ uint64_t x15 = (x14 & 0xffffffffffffffffL);
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x9, x15, &x17);
-{ uint64_t x19 = (x14 & 0x7fffffffffffffffL);
-{ uint64_t x21; uint8_t _ = _addcarryx_u64(x18, x12, x19, &x21);
-out[0] = x21;
-out[1] = x17;
-}}}}}}}
-// caller: uint64_t out[2];
+static void fesub(uint64_t out[2], const uint64_t in1[2], const uint64_t in2[2]) {
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x6 = in2[1];
+ { const uint64_t x7 = in2[0];
+ { uint64_t x9; uint8_t x10 = _subborrow_u64(0x0, x5, x7, &x9);
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(x10, x4, x6, &x12);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0xffffffffffffffffL);
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x9, x15, &x17);
+ { uint64_t x19 = (x14 & 0x7fffffffffffffffL);
+ { uint64_t x21; uint8_t _ = _addcarryx_u64(x18, x12, x19, &x21);
+ out[0] = x17;
+ out[1] = x21;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e129m25/feadd.c b/src/Specific/montgomery64_2e129m25/feadd.c
index 0d34b8eec..6bcbff963 100644
--- a/src/Specific/montgomery64_2e129m25/feadd.c
+++ b/src/Specific/montgomery64_2e129m25/feadd.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffe7L, &x22);
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x1, &x28);
-{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
-{ uint64_t x33 = cmovznz(x32, x28, x19);
-{ uint64_t x34 = cmovznz(x32, x25, x16);
-{ uint64_t x35 = cmovznz(x32, x22, x13);
-out[0] = x33;
-out[1] = x34;
-out[2] = x35;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feadd(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffe7L, &x22);
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x1, &x28);
+ { uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+ { uint64_t x33 = cmovznz(x32, x28, x19);
+ { uint64_t x34 = cmovznz(x32, x25, x16);
+ { uint64_t x35 = cmovznz(x32, x22, x13);
+ out[0] = x35;
+ out[1] = x34;
+ out[2] = x33;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e129m25/femul.c b/src/Specific/montgomery64_2e129m25/femul.c
index 43dc51d01..a99ad6719 100644
--- a/src/Specific/montgomery64_2e129m25/femul.c
+++ b/src/Specific/montgomery64_2e129m25/femul.c
@@ -1,86 +1,74 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
-{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
-{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
-{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
-{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x8f5c28f5c28f5c29L, &_);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffe7L, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(0x0, x35, x37, &x40);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(x41, x38, x31, &x43);
-{ uint64_t _; uint8_t x47 = _addcarryx_u64(0x0, x13, x34, &_);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x22, x40, &x49);
-{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x25, x43, &x52);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x28, x44, &x55);
-{ uint64_t x59; uint64_t x58 = _mulx_u64(x7, x9, &x59);
-{ uint64_t x62; uint64_t x61 = _mulx_u64(x7, x11, &x62);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x10, &x65);
-{ uint64_t x67; uint8_t x68 = _addcarryx_u64(0x0, x59, x61, &x67);
-{ uint64_t x70; uint8_t x71 = _addcarryx_u64(x68, x62, x64, &x70);
-{ uint64_t x73; uint8_t _ = _addcarryx_u64(0x0, x71, x65, &x73);
-{ uint64_t x76; uint8_t x77 = _addcarryx_u64(0x0, x49, x58, &x76);
-{ uint64_t x79; uint8_t x80 = _addcarryx_u64(x77, x52, x67, &x79);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(x80, x55, x70, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x56, x73, &x85);
-{ uint64_t _; uint64_t x88 = _mulx_u64(x76, 0x8f5c28f5c28f5c29L, &_);
-{ uint64_t x92; uint64_t x91 = _mulx_u64(x88, 0xffffffffffffffe7L, &x92);
-{ uint64_t x95; uint64_t x94 = _mulx_u64(x88, 0xffffffffffffffffL, &x95);
-{ uint64_t x97; uint8_t x98 = _addcarryx_u64(0x0, x92, x94, &x97);
-{ uint64_t x100; uint8_t x101 = _addcarryx_u64(x98, x95, x88, &x100);
-{ uint64_t _; uint8_t x104 = _addcarryx_u64(0x0, x76, x91, &_);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x79, x97, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x82, x100, &x109);
-{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x85, x101, &x112);
-{ uint8_t x114 = (x113 + x86);
-{ uint64_t x117; uint64_t x116 = _mulx_u64(x6, x9, &x117);
-{ uint64_t x120; uint64_t x119 = _mulx_u64(x6, x11, &x120);
-{ uint64_t x123; uint64_t x122 = _mulx_u64(x6, x10, &x123);
-{ uint64_t x125; uint8_t x126 = _addcarryx_u64(0x0, x117, x119, &x125);
-{ uint64_t x128; uint8_t x129 = _addcarryx_u64(x126, x120, x122, &x128);
-{ uint64_t x131; uint8_t _ = _addcarryx_u64(0x0, x129, x123, &x131);
-{ uint64_t x134; uint8_t x135 = _addcarryx_u64(0x0, x106, x116, &x134);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(x135, x109, x125, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x112, x128, &x140);
-{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x114, x131, &x143);
-{ uint64_t _; uint64_t x146 = _mulx_u64(x134, 0x8f5c28f5c28f5c29L, &_);
-{ uint64_t x150; uint64_t x149 = _mulx_u64(x146, 0xffffffffffffffe7L, &x150);
-{ uint64_t x153; uint64_t x152 = _mulx_u64(x146, 0xffffffffffffffffL, &x153);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(0x0, x150, x152, &x155);
-{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x153, x146, &x158);
-{ uint64_t _; uint8_t x162 = _addcarryx_u64(0x0, x134, x149, &_);
-{ uint64_t x164; uint8_t x165 = _addcarryx_u64(x162, x137, x155, &x164);
-{ uint64_t x167; uint8_t x168 = _addcarryx_u64(x165, x140, x158, &x167);
-{ uint64_t x170; uint8_t x171 = _addcarryx_u64(x168, x143, x159, &x170);
-{ uint8_t x172 = (x171 + x144);
-{ uint64_t x174; uint8_t x175 = _subborrow_u64(0x0, x164, 0xffffffffffffffe7L, &x174);
-{ uint64_t x177; uint8_t x178 = _subborrow_u64(x175, x167, 0xffffffffffffffffL, &x177);
-{ uint64_t x180; uint8_t x181 = _subborrow_u64(x178, x170, 0x1, &x180);
-{ uint64_t _; uint8_t x184 = _subborrow_u64(x181, x172, 0x0, &_);
-{ uint64_t x185 = cmovznz(x184, x180, x170);
-{ uint64_t x186 = cmovznz(x184, x177, x167);
-{ uint64_t x187 = cmovznz(x184, x174, x164);
-out[0] = x185;
-out[1] = x186;
-out[2] = x187;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+ { uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+ { uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+ { uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+ { uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x8f5c28f5c28f5c29L, &_);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffe7L, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(0x0, x35, x37, &x40);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(x41, x38, x31, &x43);
+ { uint64_t _; uint8_t x47 = _addcarryx_u64(0x0, x13, x34, &_);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x22, x40, &x49);
+ { uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x25, x43, &x52);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x28, x44, &x55);
+ { uint64_t x59; uint64_t x58 = _mulx_u64(x7, x9, &x59);
+ { uint64_t x62; uint64_t x61 = _mulx_u64(x7, x11, &x62);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x7, x10, &x65);
+ { uint64_t x67; uint8_t x68 = _addcarryx_u64(0x0, x59, x61, &x67);
+ { uint64_t x70; uint8_t x71 = _addcarryx_u64(x68, x62, x64, &x70);
+ { uint64_t x73; uint8_t _ = _addcarryx_u64(0x0, x71, x65, &x73);
+ { uint64_t x76; uint8_t x77 = _addcarryx_u64(0x0, x49, x58, &x76);
+ { uint64_t x79; uint8_t x80 = _addcarryx_u64(x77, x52, x67, &x79);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(x80, x55, x70, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x56, x73, &x85);
+ { uint64_t _; uint64_t x88 = _mulx_u64(x76, 0x8f5c28f5c28f5c29L, &_);
+ { uint64_t x92; uint64_t x91 = _mulx_u64(x88, 0xffffffffffffffe7L, &x92);
+ { uint64_t x95; uint64_t x94 = _mulx_u64(x88, 0xffffffffffffffffL, &x95);
+ { uint64_t x97; uint8_t x98 = _addcarryx_u64(0x0, x92, x94, &x97);
+ { uint64_t x100; uint8_t x101 = _addcarryx_u64(x98, x95, x88, &x100);
+ { uint64_t _; uint8_t x104 = _addcarryx_u64(0x0, x76, x91, &_);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x79, x97, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x82, x100, &x109);
+ { uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x85, x101, &x112);
+ { uint8_t x114 = (x113 + x86);
+ { uint64_t x117; uint64_t x116 = _mulx_u64(x6, x9, &x117);
+ { uint64_t x120; uint64_t x119 = _mulx_u64(x6, x11, &x120);
+ { uint64_t x123; uint64_t x122 = _mulx_u64(x6, x10, &x123);
+ { uint64_t x125; uint8_t x126 = _addcarryx_u64(0x0, x117, x119, &x125);
+ { uint64_t x128; uint8_t x129 = _addcarryx_u64(x126, x120, x122, &x128);
+ { uint64_t x131; uint8_t _ = _addcarryx_u64(0x0, x129, x123, &x131);
+ { uint64_t x134; uint8_t x135 = _addcarryx_u64(0x0, x106, x116, &x134);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(x135, x109, x125, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x112, x128, &x140);
+ { uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x114, x131, &x143);
+ { uint64_t _; uint64_t x146 = _mulx_u64(x134, 0x8f5c28f5c28f5c29L, &_);
+ { uint64_t x150; uint64_t x149 = _mulx_u64(x146, 0xffffffffffffffe7L, &x150);
+ { uint64_t x153; uint64_t x152 = _mulx_u64(x146, 0xffffffffffffffffL, &x153);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(0x0, x150, x152, &x155);
+ { uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x153, x146, &x158);
+ { uint64_t _; uint8_t x162 = _addcarryx_u64(0x0, x134, x149, &_);
+ { uint64_t x164; uint8_t x165 = _addcarryx_u64(x162, x137, x155, &x164);
+ { uint64_t x167; uint8_t x168 = _addcarryx_u64(x165, x140, x158, &x167);
+ { uint64_t x170; uint8_t x171 = _addcarryx_u64(x168, x143, x159, &x170);
+ { uint8_t x172 = (x171 + x144);
+ { uint64_t x174; uint8_t x175 = _subborrow_u64(0x0, x164, 0xffffffffffffffe7L, &x174);
+ { uint64_t x177; uint8_t x178 = _subborrow_u64(x175, x167, 0xffffffffffffffffL, &x177);
+ { uint64_t x180; uint8_t x181 = _subborrow_u64(x178, x170, 0x1, &x180);
+ { uint64_t _; uint8_t x184 = _subborrow_u64(x181, x172, 0x0, &_);
+ { uint64_t x185 = cmovznz(x184, x180, x170);
+ { uint64_t x186 = cmovznz(x184, x177, x167);
+ { uint64_t x187 = cmovznz(x184, x174, x164);
+ out[0] = x187;
+ out[1] = x186;
+ out[2] = x185;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e129m25/fenz.c b/src/Specific/montgomery64_2e129m25/fenz.c
index e29935ffc..2632a2866 100644
--- a/src/Specific/montgomery64_2e129m25/fenz.c
+++ b/src/Specific/montgomery64_2e129m25/fenz.c
@@ -1,24 +1,9 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x5 = (x4 | x3);
-{ uint64_t x6 = (x2 | x5);
-out[0] = x6;
-}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x5 = (x4 | x3);
+ { uint64_t x6 = (x2 | x5);
+ out[0] = x6;
+ }}}}}
+}
diff --git a/src/Specific/montgomery64_2e129m25/feopp.c b/src/Specific/montgomery64_2e129m25/feopp.c
index 3d73f8511..1a4a5ed1c 100644
--- a/src/Specific/montgomery64_2e129m25/feopp.c
+++ b/src/Specific/montgomery64_2e129m25/feopp.c
@@ -1,34 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
-{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
-{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
-{ uint64_t x15 = (x14 & 0xffffffffffffffe7L);
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
-{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
-{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
-{ uint8_t x23 = ((uint8_t)x14 & 0x1);
-{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
-out[0] = x25;
-out[1] = x21;
-out[2] = x17;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feopp(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+ { uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0xffffffffffffffe7L);
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+ { uint64_t x19 = (x14 & 0xffffffffffffffffL);
+ { uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+ { uint8_t x23 = ((uint8_t)x14 & 0x1);
+ { uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e129m25/fesub.c b/src/Specific/montgomery64_2e129m25/fesub.c
index f91af8c17..f2613260e 100644
--- a/src/Specific/montgomery64_2e129m25/fesub.c
+++ b/src/Specific/montgomery64_2e129m25/fesub.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
-{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
-{ uint64_t x22 = (x21 & 0xffffffffffffffe7L);
-{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
-{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
-{ uint8_t x30 = ((uint8_t)x21 & 0x1);
-{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
-out[0] = x32;
-out[1] = x28;
-out[2] = x24;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesub(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+ { uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+ { uint64_t x22 = (x21 & 0xffffffffffffffe7L);
+ { uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+ { uint64_t x26 = (x21 & 0xffffffffffffffffL);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+ { uint8_t x30 = ((uint8_t)x21 & 0x1);
+ { uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+ out[0] = x24;
+ out[1] = x28;
+ out[2] = x32;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e130m5/feadd.c b/src/Specific/montgomery64_2e130m5/feadd.c
index 662a2bda8..2618455df 100644
--- a/src/Specific/montgomery64_2e130m5/feadd.c
+++ b/src/Specific/montgomery64_2e130m5/feadd.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffffbL, &x22);
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3, &x28);
-{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
-{ uint64_t x33 = cmovznz(x32, x28, x19);
-{ uint64_t x34 = cmovznz(x32, x25, x16);
-{ uint64_t x35 = cmovznz(x32, x22, x13);
-out[0] = x33;
-out[1] = x34;
-out[2] = x35;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feadd(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffffbL, &x22);
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3, &x28);
+ { uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+ { uint64_t x33 = cmovznz(x32, x28, x19);
+ { uint64_t x34 = cmovznz(x32, x25, x16);
+ { uint64_t x35 = cmovznz(x32, x22, x13);
+ out[0] = x35;
+ out[1] = x34;
+ out[2] = x33;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e130m5/femul.c b/src/Specific/montgomery64_2e130m5/femul.c
index 846d29d24..c54f23886 100644
--- a/src/Specific/montgomery64_2e130m5/femul.c
+++ b/src/Specific/montgomery64_2e130m5/femul.c
@@ -1,33 +1,80 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
-{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
-{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
-{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
-{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xcccccccccccccccdL, &_);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffffbL, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
-out[0] = uint64_t x40;
-out[1] = uint8_t x41 = Op Syntax.MulSplit 64 Syntax.TWord 6 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 3 x31;
-out[2] = 0x3;;
-}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+ { uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+ { uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+ { uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+ { uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xcccccccccccccccdL, &_);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffffbL, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+ { uint64_t x40, uint8_t x41 = Op (Syntax.MulSplit 64 (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x31, 0x3);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+ { uint8_t x48 = (x47 + x41);
+ { uint64_t _; uint8_t x51 = _addcarryx_u64(0x0, x13, x34, &_);
+ { uint64_t x53; uint8_t x54 = _addcarryx_u64(x51, x22, x43, &x53);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(x54, x25, x46, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x28, x48, &x59);
+ { uint64_t x63; uint64_t x62 = _mulx_u64(x7, x9, &x63);
+ { uint64_t x66; uint64_t x65 = _mulx_u64(x7, x11, &x66);
+ { uint64_t x69; uint64_t x68 = _mulx_u64(x7, x10, &x69);
+ { uint64_t x71; uint8_t x72 = _addcarryx_u64(0x0, x63, x65, &x71);
+ { uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x66, x68, &x74);
+ { uint64_t x77; uint8_t _ = _addcarryx_u64(0x0, x75, x69, &x77);
+ { uint64_t x80; uint8_t x81 = _addcarryx_u64(0x0, x53, x62, &x80);
+ { uint64_t x83; uint8_t x84 = _addcarryx_u64(x81, x56, x71, &x83);
+ { uint64_t x86; uint8_t x87 = _addcarryx_u64(x84, x59, x74, &x86);
+ { uint64_t x89; uint8_t x90 = _addcarryx_u64(x87, x60, x77, &x89);
+ { uint64_t _; uint64_t x92 = _mulx_u64(x80, 0xcccccccccccccccdL, &_);
+ { uint64_t x96; uint64_t x95 = _mulx_u64(x92, 0xfffffffffffffffbL, &x96);
+ { uint64_t x99; uint64_t x98 = _mulx_u64(x92, 0xffffffffffffffffL, &x99);
+ { uint64_t x101, uint8_t x102 = Op (Syntax.MulSplit 64 (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x92, 0x3);
+ { uint64_t x104; uint8_t x105 = _addcarryx_u64(0x0, x96, x98, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(x105, x99, x101, &x107);
+ { uint8_t x109 = (x108 + x102);
+ { uint64_t _; uint8_t x112 = _addcarryx_u64(0x0, x80, x95, &_);
+ { uint64_t x114; uint8_t x115 = _addcarryx_u64(x112, x83, x104, &x114);
+ { uint64_t x117; uint8_t x118 = _addcarryx_u64(x115, x86, x107, &x117);
+ { uint64_t x120; uint8_t x121 = _addcarryx_u64(x118, x89, x109, &x120);
+ { uint8_t x122 = (x121 + x90);
+ { uint64_t x125; uint64_t x124 = _mulx_u64(x6, x9, &x125);
+ { uint64_t x128; uint64_t x127 = _mulx_u64(x6, x11, &x128);
+ { uint64_t x131; uint64_t x130 = _mulx_u64(x6, x10, &x131);
+ { uint64_t x133; uint8_t x134 = _addcarryx_u64(0x0, x125, x127, &x133);
+ { uint64_t x136; uint8_t x137 = _addcarryx_u64(x134, x128, x130, &x136);
+ { uint64_t x139; uint8_t _ = _addcarryx_u64(0x0, x137, x131, &x139);
+ { uint64_t x142; uint8_t x143 = _addcarryx_u64(0x0, x114, x124, &x142);
+ { uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x117, x133, &x145);
+ { uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x120, x136, &x148);
+ { uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x122, x139, &x151);
+ { uint64_t _; uint64_t x154 = _mulx_u64(x142, 0xcccccccccccccccdL, &_);
+ { uint64_t x158; uint64_t x157 = _mulx_u64(x154, 0xfffffffffffffffbL, &x158);
+ { uint64_t x161; uint64_t x160 = _mulx_u64(x154, 0xffffffffffffffffL, &x161);
+ { uint64_t x163, uint8_t x164 = Op (Syntax.MulSplit 64 (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x154, 0x3);
+ { uint64_t x166; uint8_t x167 = _addcarryx_u64(0x0, x158, x160, &x166);
+ { uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x161, x163, &x169);
+ { uint8_t x171 = (x170 + x164);
+ { uint64_t _; uint8_t x174 = _addcarryx_u64(0x0, x142, x157, &_);
+ { uint64_t x176; uint8_t x177 = _addcarryx_u64(x174, x145, x166, &x176);
+ { uint64_t x179; uint8_t x180 = _addcarryx_u64(x177, x148, x169, &x179);
+ { uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x151, x171, &x182);
+ { uint8_t x184 = (x183 + x152);
+ { uint64_t x186; uint8_t x187 = _subborrow_u64(0x0, x176, 0xfffffffffffffffbL, &x186);
+ { uint64_t x189; uint8_t x190 = _subborrow_u64(x187, x179, 0xffffffffffffffffL, &x189);
+ { uint64_t x192; uint8_t x193 = _subborrow_u64(x190, x182, 0x3, &x192);
+ { uint64_t _; uint8_t x196 = _subborrow_u64(x193, x184, 0x0, &_);
+ { uint64_t x197 = cmovznz(x196, x192, x182);
+ { uint64_t x198 = cmovznz(x196, x189, x179);
+ { uint64_t x199 = cmovznz(x196, x186, x176);
+ out[0] = x199;
+ out[1] = x198;
+ out[2] = x197;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e130m5/fenz.c b/src/Specific/montgomery64_2e130m5/fenz.c
index e29935ffc..2632a2866 100644
--- a/src/Specific/montgomery64_2e130m5/fenz.c
+++ b/src/Specific/montgomery64_2e130m5/fenz.c
@@ -1,24 +1,9 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x5 = (x4 | x3);
-{ uint64_t x6 = (x2 | x5);
-out[0] = x6;
-}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x5 = (x4 | x3);
+ { uint64_t x6 = (x2 | x5);
+ out[0] = x6;
+ }}}}}
+}
diff --git a/src/Specific/montgomery64_2e130m5/feopp.c b/src/Specific/montgomery64_2e130m5/feopp.c
index 1a52fabdb..a96271cd8 100644
--- a/src/Specific/montgomery64_2e130m5/feopp.c
+++ b/src/Specific/montgomery64_2e130m5/feopp.c
@@ -1,34 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
-{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
-{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
-{ uint64_t x15 = (x14 & 0xfffffffffffffffbL);
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
-{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
-{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
-{ uint8_t x23 = ((uint8_t)x14 & 0x3);
-{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
-out[0] = x25;
-out[1] = x21;
-out[2] = x17;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feopp(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+ { uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0xfffffffffffffffbL);
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+ { uint64_t x19 = (x14 & 0xffffffffffffffffL);
+ { uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+ { uint8_t x23 = ((uint8_t)x14 & 0x3);
+ { uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e130m5/fesub.c b/src/Specific/montgomery64_2e130m5/fesub.c
index 6443fdde3..f0577c5d6 100644
--- a/src/Specific/montgomery64_2e130m5/fesub.c
+++ b/src/Specific/montgomery64_2e130m5/fesub.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
-{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
-{ uint64_t x22 = (x21 & 0xfffffffffffffffbL);
-{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
-{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
-{ uint8_t x30 = ((uint8_t)x21 & 0x3);
-{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
-out[0] = x32;
-out[1] = x28;
-out[2] = x24;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesub(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+ { uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+ { uint64_t x22 = (x21 & 0xfffffffffffffffbL);
+ { uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+ { uint64_t x26 = (x21 & 0xffffffffffffffffL);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+ { uint8_t x30 = ((uint8_t)x21 & 0x3);
+ { uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+ out[0] = x24;
+ out[1] = x28;
+ out[2] = x32;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e137m13/feadd.c b/src/Specific/montgomery64_2e137m13/feadd.c
index 09c3c25a0..eaed15441 100644
--- a/src/Specific/montgomery64_2e137m13/feadd.c
+++ b/src/Specific/montgomery64_2e137m13/feadd.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffff3L, &x22);
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x1ff, &x28);
-{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
-{ uint64_t x33 = cmovznz(x32, x28, x19);
-{ uint64_t x34 = cmovznz(x32, x25, x16);
-{ uint64_t x35 = cmovznz(x32, x22, x13);
-out[0] = x33;
-out[1] = x34;
-out[2] = x35;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feadd(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffff3L, &x22);
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x1ff, &x28);
+ { uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+ { uint64_t x33 = cmovznz(x32, x28, x19);
+ { uint64_t x34 = cmovznz(x32, x25, x16);
+ { uint64_t x35 = cmovznz(x32, x22, x13);
+ out[0] = x35;
+ out[1] = x34;
+ out[2] = x33;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e137m13/femul.c b/src/Specific/montgomery64_2e137m13/femul.c
index 4acf26b69..612f1f798 100644
--- a/src/Specific/montgomery64_2e137m13/femul.c
+++ b/src/Specific/montgomery64_2e137m13/femul.c
@@ -1,92 +1,80 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
-{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
-{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
-{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
-{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x4ec4ec4ec4ec4ec5, &_);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffff3L, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x1ff, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
-{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
-{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
-{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
-{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
-{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
-{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
-{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x4ec4ec4ec4ec4ec5, &_);
-{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffff3L, &x98);
-{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
-{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x1ff, &x104);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
-{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
-{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
-{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
-{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
-{ uint8_t x126 = (x125 + x92);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
-{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
-{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
-{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
-{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x4ec4ec4ec4ec4ec5, &_);
-{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffff3L, &x162);
-{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
-{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x1ff, &x168);
-{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
-{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
-{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
-{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
-{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
-{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
-{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
-{ uint8_t x190 = (x189 + x156);
-{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffff3L, &x192);
-{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
-{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x1ff, &x198);
-{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
-{ uint64_t x203 = cmovznz(x202, x198, x188);
-{ uint64_t x204 = cmovznz(x202, x195, x185);
-{ uint64_t x205 = cmovznz(x202, x192, x182);
-out[0] = x203;
-out[1] = x204;
-out[2] = x205;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+ { uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+ { uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+ { uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+ { uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x4ec4ec4ec4ec4ec5, &_);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffff3L, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x1ff, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+ { uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+ { uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+ { uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+ { uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+ { uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+ { uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+ { uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x4ec4ec4ec4ec4ec5, &_);
+ { uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffff3L, &x98);
+ { uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+ { uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x1ff, &x104);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+ { uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+ { uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+ { uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+ { uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+ { uint8_t x126 = (x125 + x92);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+ { uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+ { uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+ { uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x4ec4ec4ec4ec4ec5, &_);
+ { uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffff3L, &x162);
+ { uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+ { uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x1ff, &x168);
+ { uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+ { uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+ { uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+ { uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+ { uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+ { uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+ { uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+ { uint8_t x190 = (x189 + x156);
+ { uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffff3L, &x192);
+ { uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+ { uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x1ff, &x198);
+ { uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+ { uint64_t x203 = cmovznz(x202, x198, x188);
+ { uint64_t x204 = cmovznz(x202, x195, x185);
+ { uint64_t x205 = cmovznz(x202, x192, x182);
+ out[0] = x205;
+ out[1] = x204;
+ out[2] = x203;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e137m13/fenz.c b/src/Specific/montgomery64_2e137m13/fenz.c
index e29935ffc..2632a2866 100644
--- a/src/Specific/montgomery64_2e137m13/fenz.c
+++ b/src/Specific/montgomery64_2e137m13/fenz.c
@@ -1,24 +1,9 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x5 = (x4 | x3);
-{ uint64_t x6 = (x2 | x5);
-out[0] = x6;
-}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x5 = (x4 | x3);
+ { uint64_t x6 = (x2 | x5);
+ out[0] = x6;
+ }}}}}
+}
diff --git a/src/Specific/montgomery64_2e137m13/feopp.c b/src/Specific/montgomery64_2e137m13/feopp.c
index 3639e9a08..b1e66f4a2 100644
--- a/src/Specific/montgomery64_2e137m13/feopp.c
+++ b/src/Specific/montgomery64_2e137m13/feopp.c
@@ -1,34 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
-{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
-{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
-{ uint64_t x15 = (x14 & 0xfffffffffffffff3L);
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
-{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
-{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
-{ uint64_t x23 = (x14 & 0x1ff);
-{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
-out[0] = x25;
-out[1] = x21;
-out[2] = x17;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feopp(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+ { uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0xfffffffffffffff3L);
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+ { uint64_t x19 = (x14 & 0xffffffffffffffffL);
+ { uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+ { uint64_t x23 = (x14 & 0x1ff);
+ { uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e137m13/fesub.c b/src/Specific/montgomery64_2e137m13/fesub.c
index d9b7e5783..36211f161 100644
--- a/src/Specific/montgomery64_2e137m13/fesub.c
+++ b/src/Specific/montgomery64_2e137m13/fesub.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
-{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
-{ uint64_t x22 = (x21 & 0xfffffffffffffff3L);
-{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
-{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
-{ uint64_t x30 = (x21 & 0x1ff);
-{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
-out[0] = x32;
-out[1] = x28;
-out[2] = x24;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesub(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+ { uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+ { uint64_t x22 = (x21 & 0xfffffffffffffff3L);
+ { uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+ { uint64_t x26 = (x21 & 0xffffffffffffffffL);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+ { uint64_t x30 = (x21 & 0x1ff);
+ { uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+ out[0] = x24;
+ out[1] = x28;
+ out[2] = x32;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e140m27/feadd.c b/src/Specific/montgomery64_2e140m27/feadd.c
index 492af78a0..47f29dfd8 100644
--- a/src/Specific/montgomery64_2e140m27/feadd.c
+++ b/src/Specific/montgomery64_2e140m27/feadd.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffe5L, &x22);
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0xfff, &x28);
-{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
-{ uint64_t x33 = cmovznz(x32, x28, x19);
-{ uint64_t x34 = cmovznz(x32, x25, x16);
-{ uint64_t x35 = cmovznz(x32, x22, x13);
-out[0] = x33;
-out[1] = x34;
-out[2] = x35;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feadd(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffe5L, &x22);
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0xfff, &x28);
+ { uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+ { uint64_t x33 = cmovznz(x32, x28, x19);
+ { uint64_t x34 = cmovznz(x32, x25, x16);
+ { uint64_t x35 = cmovznz(x32, x22, x13);
+ out[0] = x35;
+ out[1] = x34;
+ out[2] = x33;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e140m27/femul.c b/src/Specific/montgomery64_2e140m27/femul.c
index 335361606..840d35b98 100644
--- a/src/Specific/montgomery64_2e140m27/femul.c
+++ b/src/Specific/montgomery64_2e140m27/femul.c
@@ -1,92 +1,80 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
-{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
-{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
-{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
-{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x84bda12f684bda13L, &_);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffe5L, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0xfff, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
-{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
-{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
-{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
-{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
-{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
-{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
-{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x84bda12f684bda13L, &_);
-{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xffffffffffffffe5L, &x98);
-{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
-{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0xfff, &x104);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
-{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
-{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
-{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
-{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
-{ uint8_t x126 = (x125 + x92);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
-{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
-{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
-{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
-{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x84bda12f684bda13L, &_);
-{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xffffffffffffffe5L, &x162);
-{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
-{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0xfff, &x168);
-{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
-{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
-{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
-{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
-{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
-{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
-{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
-{ uint8_t x190 = (x189 + x156);
-{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xffffffffffffffe5L, &x192);
-{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
-{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0xfff, &x198);
-{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
-{ uint64_t x203 = cmovznz(x202, x198, x188);
-{ uint64_t x204 = cmovznz(x202, x195, x185);
-{ uint64_t x205 = cmovznz(x202, x192, x182);
-out[0] = x203;
-out[1] = x204;
-out[2] = x205;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+ { uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+ { uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+ { uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+ { uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x84bda12f684bda13L, &_);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffe5L, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0xfff, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+ { uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+ { uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+ { uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+ { uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+ { uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+ { uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+ { uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x84bda12f684bda13L, &_);
+ { uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xffffffffffffffe5L, &x98);
+ { uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+ { uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0xfff, &x104);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+ { uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+ { uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+ { uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+ { uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+ { uint8_t x126 = (x125 + x92);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+ { uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+ { uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+ { uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x84bda12f684bda13L, &_);
+ { uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xffffffffffffffe5L, &x162);
+ { uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+ { uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0xfff, &x168);
+ { uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+ { uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+ { uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+ { uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+ { uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+ { uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+ { uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+ { uint8_t x190 = (x189 + x156);
+ { uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xffffffffffffffe5L, &x192);
+ { uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+ { uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0xfff, &x198);
+ { uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+ { uint64_t x203 = cmovznz(x202, x198, x188);
+ { uint64_t x204 = cmovznz(x202, x195, x185);
+ { uint64_t x205 = cmovznz(x202, x192, x182);
+ out[0] = x205;
+ out[1] = x204;
+ out[2] = x203;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e140m27/fenz.c b/src/Specific/montgomery64_2e140m27/fenz.c
index e29935ffc..2632a2866 100644
--- a/src/Specific/montgomery64_2e140m27/fenz.c
+++ b/src/Specific/montgomery64_2e140m27/fenz.c
@@ -1,24 +1,9 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x5 = (x4 | x3);
-{ uint64_t x6 = (x2 | x5);
-out[0] = x6;
-}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x5 = (x4 | x3);
+ { uint64_t x6 = (x2 | x5);
+ out[0] = x6;
+ }}}}}
+}
diff --git a/src/Specific/montgomery64_2e140m27/feopp.c b/src/Specific/montgomery64_2e140m27/feopp.c
index f9d52d171..5cd210957 100644
--- a/src/Specific/montgomery64_2e140m27/feopp.c
+++ b/src/Specific/montgomery64_2e140m27/feopp.c
@@ -1,34 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
-{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
-{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
-{ uint64_t x15 = (x14 & 0xffffffffffffffe5L);
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
-{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
-{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
-{ uint64_t x23 = (x14 & 0xfff);
-{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
-out[0] = x25;
-out[1] = x21;
-out[2] = x17;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feopp(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+ { uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0xffffffffffffffe5L);
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+ { uint64_t x19 = (x14 & 0xffffffffffffffffL);
+ { uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+ { uint64_t x23 = (x14 & 0xfff);
+ { uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e140m27/fesub.c b/src/Specific/montgomery64_2e140m27/fesub.c
index 2d144c7b4..b1fc13a20 100644
--- a/src/Specific/montgomery64_2e140m27/fesub.c
+++ b/src/Specific/montgomery64_2e140m27/fesub.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
-{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
-{ uint64_t x22 = (x21 & 0xffffffffffffffe5L);
-{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
-{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
-{ uint64_t x30 = (x21 & 0xfff);
-{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
-out[0] = x32;
-out[1] = x28;
-out[2] = x24;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesub(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+ { uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+ { uint64_t x22 = (x21 & 0xffffffffffffffe5L);
+ { uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+ { uint64_t x26 = (x21 & 0xffffffffffffffffL);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+ { uint64_t x30 = (x21 & 0xfff);
+ { uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+ out[0] = x24;
+ out[1] = x28;
+ out[2] = x32;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e141m9/feadd.c b/src/Specific/montgomery64_2e141m9/feadd.c
index 6d573b5b8..8a09050f2 100644
--- a/src/Specific/montgomery64_2e141m9/feadd.c
+++ b/src/Specific/montgomery64_2e141m9/feadd.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffff7L, &x22);
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x1fff, &x28);
-{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
-{ uint64_t x33 = cmovznz(x32, x28, x19);
-{ uint64_t x34 = cmovznz(x32, x25, x16);
-{ uint64_t x35 = cmovznz(x32, x22, x13);
-out[0] = x33;
-out[1] = x34;
-out[2] = x35;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feadd(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffff7L, &x22);
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x1fff, &x28);
+ { uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+ { uint64_t x33 = cmovznz(x32, x28, x19);
+ { uint64_t x34 = cmovznz(x32, x25, x16);
+ { uint64_t x35 = cmovznz(x32, x22, x13);
+ out[0] = x35;
+ out[1] = x34;
+ out[2] = x33;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e141m9/femul.c b/src/Specific/montgomery64_2e141m9/femul.c
index b93beb68a..1ef81a49b 100644
--- a/src/Specific/montgomery64_2e141m9/femul.c
+++ b/src/Specific/montgomery64_2e141m9/femul.c
@@ -1,92 +1,80 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
-{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
-{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
-{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
-{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x8e38e38e38e38e39L, &_);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffff7L, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x1fff, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
-{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
-{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
-{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
-{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
-{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
-{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
-{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x8e38e38e38e38e39L, &_);
-{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffff7L, &x98);
-{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
-{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x1fff, &x104);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
-{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
-{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
-{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
-{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
-{ uint8_t x126 = (x125 + x92);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
-{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
-{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
-{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
-{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x8e38e38e38e38e39L, &_);
-{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffff7L, &x162);
-{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
-{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x1fff, &x168);
-{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
-{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
-{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
-{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
-{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
-{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
-{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
-{ uint8_t x190 = (x189 + x156);
-{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffff7L, &x192);
-{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
-{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x1fff, &x198);
-{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
-{ uint64_t x203 = cmovznz(x202, x198, x188);
-{ uint64_t x204 = cmovznz(x202, x195, x185);
-{ uint64_t x205 = cmovznz(x202, x192, x182);
-out[0] = x203;
-out[1] = x204;
-out[2] = x205;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+ { uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+ { uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+ { uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+ { uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x8e38e38e38e38e39L, &_);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffff7L, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x1fff, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+ { uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+ { uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+ { uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+ { uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+ { uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+ { uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+ { uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x8e38e38e38e38e39L, &_);
+ { uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffff7L, &x98);
+ { uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+ { uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x1fff, &x104);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+ { uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+ { uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+ { uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+ { uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+ { uint8_t x126 = (x125 + x92);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+ { uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+ { uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+ { uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x8e38e38e38e38e39L, &_);
+ { uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffff7L, &x162);
+ { uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+ { uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x1fff, &x168);
+ { uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+ { uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+ { uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+ { uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+ { uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+ { uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+ { uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+ { uint8_t x190 = (x189 + x156);
+ { uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffff7L, &x192);
+ { uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+ { uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x1fff, &x198);
+ { uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+ { uint64_t x203 = cmovznz(x202, x198, x188);
+ { uint64_t x204 = cmovznz(x202, x195, x185);
+ { uint64_t x205 = cmovznz(x202, x192, x182);
+ out[0] = x205;
+ out[1] = x204;
+ out[2] = x203;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e141m9/fenz.c b/src/Specific/montgomery64_2e141m9/fenz.c
index e29935ffc..2632a2866 100644
--- a/src/Specific/montgomery64_2e141m9/fenz.c
+++ b/src/Specific/montgomery64_2e141m9/fenz.c
@@ -1,24 +1,9 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x5 = (x4 | x3);
-{ uint64_t x6 = (x2 | x5);
-out[0] = x6;
-}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x5 = (x4 | x3);
+ { uint64_t x6 = (x2 | x5);
+ out[0] = x6;
+ }}}}}
+}
diff --git a/src/Specific/montgomery64_2e141m9/feopp.c b/src/Specific/montgomery64_2e141m9/feopp.c
index 29462c1fd..03b985631 100644
--- a/src/Specific/montgomery64_2e141m9/feopp.c
+++ b/src/Specific/montgomery64_2e141m9/feopp.c
@@ -1,34 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
-{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
-{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
-{ uint64_t x15 = (x14 & 0xfffffffffffffff7L);
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
-{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
-{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
-{ uint64_t x23 = (x14 & 0x1fff);
-{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
-out[0] = x25;
-out[1] = x21;
-out[2] = x17;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feopp(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+ { uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0xfffffffffffffff7L);
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+ { uint64_t x19 = (x14 & 0xffffffffffffffffL);
+ { uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+ { uint64_t x23 = (x14 & 0x1fff);
+ { uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e141m9/fesub.c b/src/Specific/montgomery64_2e141m9/fesub.c
index b414e11a1..c37a6757d 100644
--- a/src/Specific/montgomery64_2e141m9/fesub.c
+++ b/src/Specific/montgomery64_2e141m9/fesub.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
-{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
-{ uint64_t x22 = (x21 & 0xfffffffffffffff7L);
-{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
-{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
-{ uint64_t x30 = (x21 & 0x1fff);
-{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
-out[0] = x32;
-out[1] = x28;
-out[2] = x24;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesub(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+ { uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+ { uint64_t x22 = (x21 & 0xfffffffffffffff7L);
+ { uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+ { uint64_t x26 = (x21 & 0xffffffffffffffffL);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+ { uint64_t x30 = (x21 & 0x1fff);
+ { uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+ out[0] = x24;
+ out[1] = x28;
+ out[2] = x32;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e150m3/feadd.c b/src/Specific/montgomery64_2e150m3/feadd.c
index 51b20f325..7e035475b 100644
--- a/src/Specific/montgomery64_2e150m3/feadd.c
+++ b/src/Specific/montgomery64_2e150m3/feadd.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffffdL, &x22);
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3fffff, &x28);
-{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
-{ uint64_t x33 = cmovznz(x32, x28, x19);
-{ uint64_t x34 = cmovznz(x32, x25, x16);
-{ uint64_t x35 = cmovznz(x32, x22, x13);
-out[0] = x33;
-out[1] = x34;
-out[2] = x35;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feadd(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffffdL, &x22);
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3fffff, &x28);
+ { uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+ { uint64_t x33 = cmovznz(x32, x28, x19);
+ { uint64_t x34 = cmovznz(x32, x25, x16);
+ { uint64_t x35 = cmovznz(x32, x22, x13);
+ out[0] = x35;
+ out[1] = x34;
+ out[2] = x33;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e150m3/femul.c b/src/Specific/montgomery64_2e150m3/femul.c
index 9dd6af2b7..bb3e619b1 100644
--- a/src/Specific/montgomery64_2e150m3/femul.c
+++ b/src/Specific/montgomery64_2e150m3/femul.c
@@ -1,92 +1,80 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
-{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
-{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
-{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
-{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffffdL, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x3fffff, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
-{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
-{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
-{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
-{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
-{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
-{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
-{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffffdL, &x98);
-{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
-{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x3fffff, &x104);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
-{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
-{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
-{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
-{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
-{ uint8_t x126 = (x125 + x92);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
-{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
-{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
-{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
-{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffffdL, &x162);
-{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
-{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x3fffff, &x168);
-{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
-{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
-{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
-{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
-{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
-{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
-{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
-{ uint8_t x190 = (x189 + x156);
-{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffffdL, &x192);
-{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
-{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x3fffff, &x198);
-{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
-{ uint64_t x203 = cmovznz(x202, x198, x188);
-{ uint64_t x204 = cmovznz(x202, x195, x185);
-{ uint64_t x205 = cmovznz(x202, x192, x182);
-out[0] = x203;
-out[1] = x204;
-out[2] = x205;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+ { uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+ { uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+ { uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+ { uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffffdL, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x3fffff, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+ { uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+ { uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+ { uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+ { uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+ { uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+ { uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+ { uint64_t _; uint64_t x94 = _mulx_u64(x82, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffffdL, &x98);
+ { uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+ { uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x3fffff, &x104);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+ { uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+ { uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+ { uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+ { uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+ { uint8_t x126 = (x125 + x92);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+ { uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+ { uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+ { uint64_t _; uint64_t x158 = _mulx_u64(x146, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffffdL, &x162);
+ { uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+ { uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x3fffff, &x168);
+ { uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+ { uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+ { uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+ { uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+ { uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+ { uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+ { uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+ { uint8_t x190 = (x189 + x156);
+ { uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffffdL, &x192);
+ { uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+ { uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x3fffff, &x198);
+ { uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+ { uint64_t x203 = cmovznz(x202, x198, x188);
+ { uint64_t x204 = cmovznz(x202, x195, x185);
+ { uint64_t x205 = cmovznz(x202, x192, x182);
+ out[0] = x205;
+ out[1] = x204;
+ out[2] = x203;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e150m3/fenz.c b/src/Specific/montgomery64_2e150m3/fenz.c
index e29935ffc..2632a2866 100644
--- a/src/Specific/montgomery64_2e150m3/fenz.c
+++ b/src/Specific/montgomery64_2e150m3/fenz.c
@@ -1,24 +1,9 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x5 = (x4 | x3);
-{ uint64_t x6 = (x2 | x5);
-out[0] = x6;
-}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x5 = (x4 | x3);
+ { uint64_t x6 = (x2 | x5);
+ out[0] = x6;
+ }}}}}
+}
diff --git a/src/Specific/montgomery64_2e150m3/feopp.c b/src/Specific/montgomery64_2e150m3/feopp.c
index a560642cb..71d501063 100644
--- a/src/Specific/montgomery64_2e150m3/feopp.c
+++ b/src/Specific/montgomery64_2e150m3/feopp.c
@@ -1,34 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
-{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
-{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
-{ uint64_t x15 = (x14 & 0xfffffffffffffffdL);
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
-{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
-{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
-{ uint64_t x23 = (x14 & 0x3fffff);
-{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
-out[0] = x25;
-out[1] = x21;
-out[2] = x17;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feopp(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+ { uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0xfffffffffffffffdL);
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+ { uint64_t x19 = (x14 & 0xffffffffffffffffL);
+ { uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+ { uint64_t x23 = (x14 & 0x3fffff);
+ { uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e150m3/fesub.c b/src/Specific/montgomery64_2e150m3/fesub.c
index 843b2de7b..8a186256c 100644
--- a/src/Specific/montgomery64_2e150m3/fesub.c
+++ b/src/Specific/montgomery64_2e150m3/fesub.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
-{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
-{ uint64_t x22 = (x21 & 0xfffffffffffffffdL);
-{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
-{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
-{ uint64_t x30 = (x21 & 0x3fffff);
-{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
-out[0] = x32;
-out[1] = x28;
-out[2] = x24;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesub(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+ { uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+ { uint64_t x22 = (x21 & 0xfffffffffffffffdL);
+ { uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+ { uint64_t x26 = (x21 & 0xffffffffffffffffL);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+ { uint64_t x30 = (x21 & 0x3fffff);
+ { uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+ out[0] = x24;
+ out[1] = x28;
+ out[2] = x32;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e150m5/feadd.c b/src/Specific/montgomery64_2e150m5/feadd.c
index d8642ec59..946523de3 100644
--- a/src/Specific/montgomery64_2e150m5/feadd.c
+++ b/src/Specific/montgomery64_2e150m5/feadd.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffffbL, &x22);
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3fffff, &x28);
-{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
-{ uint64_t x33 = cmovznz(x32, x28, x19);
-{ uint64_t x34 = cmovznz(x32, x25, x16);
-{ uint64_t x35 = cmovznz(x32, x22, x13);
-out[0] = x33;
-out[1] = x34;
-out[2] = x35;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feadd(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffffbL, &x22);
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3fffff, &x28);
+ { uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+ { uint64_t x33 = cmovznz(x32, x28, x19);
+ { uint64_t x34 = cmovznz(x32, x25, x16);
+ { uint64_t x35 = cmovznz(x32, x22, x13);
+ out[0] = x35;
+ out[1] = x34;
+ out[2] = x33;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e150m5/femul.c b/src/Specific/montgomery64_2e150m5/femul.c
index 3f8eed0bd..a8aa30740 100644
--- a/src/Specific/montgomery64_2e150m5/femul.c
+++ b/src/Specific/montgomery64_2e150m5/femul.c
@@ -1,92 +1,80 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
-{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
-{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
-{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
-{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xcccccccccccccccdL, &_);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffffbL, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x3fffff, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
-{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
-{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
-{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
-{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
-{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
-{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
-{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0xcccccccccccccccdL, &_);
-{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffffbL, &x98);
-{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
-{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x3fffff, &x104);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
-{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
-{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
-{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
-{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
-{ uint8_t x126 = (x125 + x92);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
-{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
-{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
-{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
-{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0xcccccccccccccccdL, &_);
-{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffffbL, &x162);
-{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
-{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x3fffff, &x168);
-{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
-{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
-{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
-{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
-{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
-{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
-{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
-{ uint8_t x190 = (x189 + x156);
-{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffffbL, &x192);
-{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
-{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x3fffff, &x198);
-{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
-{ uint64_t x203 = cmovznz(x202, x198, x188);
-{ uint64_t x204 = cmovznz(x202, x195, x185);
-{ uint64_t x205 = cmovznz(x202, x192, x182);
-out[0] = x203;
-out[1] = x204;
-out[2] = x205;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+ { uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+ { uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+ { uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+ { uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xcccccccccccccccdL, &_);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffffbL, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x3fffff, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+ { uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+ { uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+ { uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+ { uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+ { uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+ { uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+ { uint64_t _; uint64_t x94 = _mulx_u64(x82, 0xcccccccccccccccdL, &_);
+ { uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffffbL, &x98);
+ { uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+ { uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x3fffff, &x104);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+ { uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+ { uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+ { uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+ { uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+ { uint8_t x126 = (x125 + x92);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+ { uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+ { uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+ { uint64_t _; uint64_t x158 = _mulx_u64(x146, 0xcccccccccccccccdL, &_);
+ { uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffffbL, &x162);
+ { uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+ { uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x3fffff, &x168);
+ { uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+ { uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+ { uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+ { uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+ { uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+ { uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+ { uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+ { uint8_t x190 = (x189 + x156);
+ { uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffffbL, &x192);
+ { uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+ { uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x3fffff, &x198);
+ { uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+ { uint64_t x203 = cmovznz(x202, x198, x188);
+ { uint64_t x204 = cmovznz(x202, x195, x185);
+ { uint64_t x205 = cmovznz(x202, x192, x182);
+ out[0] = x205;
+ out[1] = x204;
+ out[2] = x203;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e150m5/fenz.c b/src/Specific/montgomery64_2e150m5/fenz.c
index e29935ffc..2632a2866 100644
--- a/src/Specific/montgomery64_2e150m5/fenz.c
+++ b/src/Specific/montgomery64_2e150m5/fenz.c
@@ -1,24 +1,9 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x5 = (x4 | x3);
-{ uint64_t x6 = (x2 | x5);
-out[0] = x6;
-}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x5 = (x4 | x3);
+ { uint64_t x6 = (x2 | x5);
+ out[0] = x6;
+ }}}}}
+}
diff --git a/src/Specific/montgomery64_2e150m5/feopp.c b/src/Specific/montgomery64_2e150m5/feopp.c
index f00debb7e..7546be8c5 100644
--- a/src/Specific/montgomery64_2e150m5/feopp.c
+++ b/src/Specific/montgomery64_2e150m5/feopp.c
@@ -1,34 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
-{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
-{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
-{ uint64_t x15 = (x14 & 0xfffffffffffffffbL);
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
-{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
-{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
-{ uint64_t x23 = (x14 & 0x3fffff);
-{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
-out[0] = x25;
-out[1] = x21;
-out[2] = x17;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feopp(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+ { uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0xfffffffffffffffbL);
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+ { uint64_t x19 = (x14 & 0xffffffffffffffffL);
+ { uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+ { uint64_t x23 = (x14 & 0x3fffff);
+ { uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e150m5/fesub.c b/src/Specific/montgomery64_2e150m5/fesub.c
index dccdfa918..2309ec81d 100644
--- a/src/Specific/montgomery64_2e150m5/fesub.c
+++ b/src/Specific/montgomery64_2e150m5/fesub.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
-{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
-{ uint64_t x22 = (x21 & 0xfffffffffffffffbL);
-{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
-{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
-{ uint64_t x30 = (x21 & 0x3fffff);
-{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
-out[0] = x32;
-out[1] = x28;
-out[2] = x24;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesub(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+ { uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+ { uint64_t x22 = (x21 & 0xfffffffffffffffbL);
+ { uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+ { uint64_t x26 = (x21 & 0xffffffffffffffffL);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+ { uint64_t x30 = (x21 & 0x3fffff);
+ { uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+ out[0] = x24;
+ out[1] = x28;
+ out[2] = x32;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e152m17/feadd.c b/src/Specific/montgomery64_2e152m17/feadd.c
index 417bc7169..bf1578506 100644
--- a/src/Specific/montgomery64_2e152m17/feadd.c
+++ b/src/Specific/montgomery64_2e152m17/feadd.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffefL, &x22);
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0xffffff, &x28);
-{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
-{ uint64_t x33 = cmovznz(x32, x28, x19);
-{ uint64_t x34 = cmovznz(x32, x25, x16);
-{ uint64_t x35 = cmovznz(x32, x22, x13);
-out[0] = x33;
-out[1] = x34;
-out[2] = x35;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feadd(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffefL, &x22);
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0xffffff, &x28);
+ { uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+ { uint64_t x33 = cmovznz(x32, x28, x19);
+ { uint64_t x34 = cmovznz(x32, x25, x16);
+ { uint64_t x35 = cmovznz(x32, x22, x13);
+ out[0] = x35;
+ out[1] = x34;
+ out[2] = x33;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e152m17/femul.c b/src/Specific/montgomery64_2e152m17/femul.c
index 4c615812a..69b22d21f 100644
--- a/src/Specific/montgomery64_2e152m17/femul.c
+++ b/src/Specific/montgomery64_2e152m17/femul.c
@@ -1,92 +1,80 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
-{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
-{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
-{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
-{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xf0f0f0f0f0f0f0f1L, &_);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffefL, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0xffffff, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
-{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
-{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
-{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
-{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
-{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
-{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
-{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0xf0f0f0f0f0f0f0f1L, &_);
-{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xffffffffffffffefL, &x98);
-{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
-{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0xffffff, &x104);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
-{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
-{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
-{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
-{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
-{ uint8_t x126 = (x125 + x92);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
-{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
-{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
-{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
-{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0xf0f0f0f0f0f0f0f1L, &_);
-{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xffffffffffffffefL, &x162);
-{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
-{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0xffffff, &x168);
-{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
-{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
-{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
-{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
-{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
-{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
-{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
-{ uint8_t x190 = (x189 + x156);
-{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xffffffffffffffefL, &x192);
-{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
-{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0xffffff, &x198);
-{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
-{ uint64_t x203 = cmovznz(x202, x198, x188);
-{ uint64_t x204 = cmovznz(x202, x195, x185);
-{ uint64_t x205 = cmovznz(x202, x192, x182);
-out[0] = x203;
-out[1] = x204;
-out[2] = x205;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+ { uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+ { uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+ { uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+ { uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xf0f0f0f0f0f0f0f1L, &_);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffefL, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0xffffff, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+ { uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+ { uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+ { uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+ { uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+ { uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+ { uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+ { uint64_t _; uint64_t x94 = _mulx_u64(x82, 0xf0f0f0f0f0f0f0f1L, &_);
+ { uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xffffffffffffffefL, &x98);
+ { uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+ { uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0xffffff, &x104);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+ { uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+ { uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+ { uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+ { uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+ { uint8_t x126 = (x125 + x92);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+ { uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+ { uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+ { uint64_t _; uint64_t x158 = _mulx_u64(x146, 0xf0f0f0f0f0f0f0f1L, &_);
+ { uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xffffffffffffffefL, &x162);
+ { uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+ { uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0xffffff, &x168);
+ { uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+ { uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+ { uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+ { uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+ { uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+ { uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+ { uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+ { uint8_t x190 = (x189 + x156);
+ { uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xffffffffffffffefL, &x192);
+ { uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+ { uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0xffffff, &x198);
+ { uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+ { uint64_t x203 = cmovznz(x202, x198, x188);
+ { uint64_t x204 = cmovznz(x202, x195, x185);
+ { uint64_t x205 = cmovznz(x202, x192, x182);
+ out[0] = x205;
+ out[1] = x204;
+ out[2] = x203;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e152m17/fenz.c b/src/Specific/montgomery64_2e152m17/fenz.c
index e29935ffc..2632a2866 100644
--- a/src/Specific/montgomery64_2e152m17/fenz.c
+++ b/src/Specific/montgomery64_2e152m17/fenz.c
@@ -1,24 +1,9 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x5 = (x4 | x3);
-{ uint64_t x6 = (x2 | x5);
-out[0] = x6;
-}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x5 = (x4 | x3);
+ { uint64_t x6 = (x2 | x5);
+ out[0] = x6;
+ }}}}}
+}
diff --git a/src/Specific/montgomery64_2e152m17/feopp.c b/src/Specific/montgomery64_2e152m17/feopp.c
index e543901b3..02f99f04d 100644
--- a/src/Specific/montgomery64_2e152m17/feopp.c
+++ b/src/Specific/montgomery64_2e152m17/feopp.c
@@ -1,34 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
-{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
-{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
-{ uint64_t x15 = (x14 & 0xffffffffffffffefL);
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
-{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
-{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
-{ uint64_t x23 = (x14 & 0xffffff);
-{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
-out[0] = x25;
-out[1] = x21;
-out[2] = x17;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feopp(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+ { uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0xffffffffffffffefL);
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+ { uint64_t x19 = (x14 & 0xffffffffffffffffL);
+ { uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+ { uint64_t x23 = (x14 & 0xffffff);
+ { uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e152m17/fesub.c b/src/Specific/montgomery64_2e152m17/fesub.c
index e6906e865..c841d52c1 100644
--- a/src/Specific/montgomery64_2e152m17/fesub.c
+++ b/src/Specific/montgomery64_2e152m17/fesub.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
-{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
-{ uint64_t x22 = (x21 & 0xffffffffffffffefL);
-{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
-{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
-{ uint64_t x30 = (x21 & 0xffffff);
-{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
-out[0] = x32;
-out[1] = x28;
-out[2] = x24;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesub(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+ { uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+ { uint64_t x22 = (x21 & 0xffffffffffffffefL);
+ { uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+ { uint64_t x26 = (x21 & 0xffffffffffffffffL);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+ { uint64_t x30 = (x21 & 0xffffff);
+ { uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+ out[0] = x24;
+ out[1] = x28;
+ out[2] = x32;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e158m15/feadd.c b/src/Specific/montgomery64_2e158m15/feadd.c
index dfea544b4..96890e2c8 100644
--- a/src/Specific/montgomery64_2e158m15/feadd.c
+++ b/src/Specific/montgomery64_2e158m15/feadd.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffff1L, &x22);
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3fffffff, &x28);
-{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
-{ uint64_t x33 = cmovznz(x32, x28, x19);
-{ uint64_t x34 = cmovznz(x32, x25, x16);
-{ uint64_t x35 = cmovznz(x32, x22, x13);
-out[0] = x33;
-out[1] = x34;
-out[2] = x35;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feadd(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffff1L, &x22);
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3fffffff, &x28);
+ { uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+ { uint64_t x33 = cmovznz(x32, x28, x19);
+ { uint64_t x34 = cmovznz(x32, x25, x16);
+ { uint64_t x35 = cmovznz(x32, x22, x13);
+ out[0] = x35;
+ out[1] = x34;
+ out[2] = x33;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e158m15/femul.c b/src/Specific/montgomery64_2e158m15/femul.c
index 15e6f0071..d6315cc25 100644
--- a/src/Specific/montgomery64_2e158m15/femul.c
+++ b/src/Specific/montgomery64_2e158m15/femul.c
@@ -1,92 +1,80 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
-{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
-{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
-{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
-{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xeeeeeeeeeeeeeeefL, &_);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffff1L, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x3fffffff, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
-{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
-{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
-{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
-{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
-{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
-{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
-{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0xeeeeeeeeeeeeeeefL, &_);
-{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffff1L, &x98);
-{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
-{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x3fffffff, &x104);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
-{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
-{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
-{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
-{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
-{ uint8_t x126 = (x125 + x92);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
-{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
-{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
-{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
-{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0xeeeeeeeeeeeeeeefL, &_);
-{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffff1L, &x162);
-{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
-{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x3fffffff, &x168);
-{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
-{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
-{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
-{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
-{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
-{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
-{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
-{ uint8_t x190 = (x189 + x156);
-{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffff1L, &x192);
-{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
-{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x3fffffff, &x198);
-{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
-{ uint64_t x203 = cmovznz(x202, x198, x188);
-{ uint64_t x204 = cmovznz(x202, x195, x185);
-{ uint64_t x205 = cmovznz(x202, x192, x182);
-out[0] = x203;
-out[1] = x204;
-out[2] = x205;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+ { uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+ { uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+ { uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+ { uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xeeeeeeeeeeeeeeefL, &_);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffff1L, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x3fffffff, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+ { uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+ { uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+ { uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+ { uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+ { uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+ { uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+ { uint64_t _; uint64_t x94 = _mulx_u64(x82, 0xeeeeeeeeeeeeeeefL, &_);
+ { uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffff1L, &x98);
+ { uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+ { uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x3fffffff, &x104);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+ { uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+ { uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+ { uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+ { uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+ { uint8_t x126 = (x125 + x92);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+ { uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+ { uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+ { uint64_t _; uint64_t x158 = _mulx_u64(x146, 0xeeeeeeeeeeeeeeefL, &_);
+ { uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffff1L, &x162);
+ { uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+ { uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x3fffffff, &x168);
+ { uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+ { uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+ { uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+ { uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+ { uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+ { uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+ { uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+ { uint8_t x190 = (x189 + x156);
+ { uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffff1L, &x192);
+ { uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+ { uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x3fffffff, &x198);
+ { uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+ { uint64_t x203 = cmovznz(x202, x198, x188);
+ { uint64_t x204 = cmovznz(x202, x195, x185);
+ { uint64_t x205 = cmovznz(x202, x192, x182);
+ out[0] = x205;
+ out[1] = x204;
+ out[2] = x203;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e158m15/fenz.c b/src/Specific/montgomery64_2e158m15/fenz.c
index e29935ffc..2632a2866 100644
--- a/src/Specific/montgomery64_2e158m15/fenz.c
+++ b/src/Specific/montgomery64_2e158m15/fenz.c
@@ -1,24 +1,9 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x5 = (x4 | x3);
-{ uint64_t x6 = (x2 | x5);
-out[0] = x6;
-}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x5 = (x4 | x3);
+ { uint64_t x6 = (x2 | x5);
+ out[0] = x6;
+ }}}}}
+}
diff --git a/src/Specific/montgomery64_2e158m15/feopp.c b/src/Specific/montgomery64_2e158m15/feopp.c
index ebcf46f1d..16324eab8 100644
--- a/src/Specific/montgomery64_2e158m15/feopp.c
+++ b/src/Specific/montgomery64_2e158m15/feopp.c
@@ -1,34 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
-{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
-{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
-{ uint64_t x15 = (x14 & 0xfffffffffffffff1L);
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
-{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
-{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
-{ uint64_t x23 = (x14 & 0x3fffffff);
-{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
-out[0] = x25;
-out[1] = x21;
-out[2] = x17;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feopp(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+ { uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0xfffffffffffffff1L);
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+ { uint64_t x19 = (x14 & 0xffffffffffffffffL);
+ { uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+ { uint64_t x23 = (x14 & 0x3fffffff);
+ { uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e158m15/fesub.c b/src/Specific/montgomery64_2e158m15/fesub.c
index 23dfa5006..227281c23 100644
--- a/src/Specific/montgomery64_2e158m15/fesub.c
+++ b/src/Specific/montgomery64_2e158m15/fesub.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
-{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
-{ uint64_t x22 = (x21 & 0xfffffffffffffff1L);
-{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
-{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
-{ uint64_t x30 = (x21 & 0x3fffffff);
-{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
-out[0] = x32;
-out[1] = x28;
-out[2] = x24;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesub(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+ { uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+ { uint64_t x22 = (x21 & 0xfffffffffffffff1L);
+ { uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+ { uint64_t x26 = (x21 & 0xffffffffffffffffL);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+ { uint64_t x30 = (x21 & 0x3fffffff);
+ { uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+ out[0] = x24;
+ out[1] = x28;
+ out[2] = x32;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e165m25/feadd.c b/src/Specific/montgomery64_2e165m25/feadd.c
index e84dc499d..ce0d47446 100644
--- a/src/Specific/montgomery64_2e165m25/feadd.c
+++ b/src/Specific/montgomery64_2e165m25/feadd.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffe7L, &x22);
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x1fffffffff, &x28);
-{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
-{ uint64_t x33 = cmovznz(x32, x28, x19);
-{ uint64_t x34 = cmovznz(x32, x25, x16);
-{ uint64_t x35 = cmovznz(x32, x22, x13);
-out[0] = x33;
-out[1] = x34;
-out[2] = x35;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feadd(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffe7L, &x22);
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x1fffffffff, &x28);
+ { uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+ { uint64_t x33 = cmovznz(x32, x28, x19);
+ { uint64_t x34 = cmovznz(x32, x25, x16);
+ { uint64_t x35 = cmovznz(x32, x22, x13);
+ out[0] = x35;
+ out[1] = x34;
+ out[2] = x33;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e165m25/femul.c b/src/Specific/montgomery64_2e165m25/femul.c
index 44766da38..2b7ac00dc 100644
--- a/src/Specific/montgomery64_2e165m25/femul.c
+++ b/src/Specific/montgomery64_2e165m25/femul.c
@@ -1,92 +1,80 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
-{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
-{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
-{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
-{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x8f5c28f5c28f5c29L, &_);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffe7L, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x1fffffffff, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
-{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
-{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
-{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
-{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
-{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
-{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
-{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x8f5c28f5c28f5c29L, &_);
-{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xffffffffffffffe7L, &x98);
-{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
-{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x1fffffffff, &x104);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
-{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
-{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
-{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
-{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
-{ uint8_t x126 = (x125 + x92);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
-{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
-{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
-{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
-{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x8f5c28f5c28f5c29L, &_);
-{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xffffffffffffffe7L, &x162);
-{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
-{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x1fffffffff, &x168);
-{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
-{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
-{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
-{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
-{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
-{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
-{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
-{ uint8_t x190 = (x189 + x156);
-{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xffffffffffffffe7L, &x192);
-{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
-{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x1fffffffff, &x198);
-{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
-{ uint64_t x203 = cmovznz(x202, x198, x188);
-{ uint64_t x204 = cmovznz(x202, x195, x185);
-{ uint64_t x205 = cmovznz(x202, x192, x182);
-out[0] = x203;
-out[1] = x204;
-out[2] = x205;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+ { uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+ { uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+ { uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+ { uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x8f5c28f5c28f5c29L, &_);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffe7L, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x1fffffffff, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+ { uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+ { uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+ { uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+ { uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+ { uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+ { uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+ { uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x8f5c28f5c28f5c29L, &_);
+ { uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xffffffffffffffe7L, &x98);
+ { uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+ { uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x1fffffffff, &x104);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+ { uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+ { uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+ { uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+ { uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+ { uint8_t x126 = (x125 + x92);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+ { uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+ { uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+ { uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x8f5c28f5c28f5c29L, &_);
+ { uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xffffffffffffffe7L, &x162);
+ { uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+ { uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x1fffffffff, &x168);
+ { uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+ { uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+ { uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+ { uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+ { uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+ { uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+ { uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+ { uint8_t x190 = (x189 + x156);
+ { uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xffffffffffffffe7L, &x192);
+ { uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+ { uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x1fffffffff, &x198);
+ { uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+ { uint64_t x203 = cmovznz(x202, x198, x188);
+ { uint64_t x204 = cmovznz(x202, x195, x185);
+ { uint64_t x205 = cmovznz(x202, x192, x182);
+ out[0] = x205;
+ out[1] = x204;
+ out[2] = x203;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e165m25/fenz.c b/src/Specific/montgomery64_2e165m25/fenz.c
index e29935ffc..2632a2866 100644
--- a/src/Specific/montgomery64_2e165m25/fenz.c
+++ b/src/Specific/montgomery64_2e165m25/fenz.c
@@ -1,24 +1,9 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x5 = (x4 | x3);
-{ uint64_t x6 = (x2 | x5);
-out[0] = x6;
-}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x5 = (x4 | x3);
+ { uint64_t x6 = (x2 | x5);
+ out[0] = x6;
+ }}}}}
+}
diff --git a/src/Specific/montgomery64_2e165m25/feopp.c b/src/Specific/montgomery64_2e165m25/feopp.c
index 86c44bbc9..0628a94cd 100644
--- a/src/Specific/montgomery64_2e165m25/feopp.c
+++ b/src/Specific/montgomery64_2e165m25/feopp.c
@@ -1,34 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
-{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
-{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
-{ uint64_t x15 = (x14 & 0xffffffffffffffe7L);
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
-{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
-{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
-{ uint64_t x23 = (x14 & 0x1fffffffff);
-{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
-out[0] = x25;
-out[1] = x21;
-out[2] = x17;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feopp(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+ { uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0xffffffffffffffe7L);
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+ { uint64_t x19 = (x14 & 0xffffffffffffffffL);
+ { uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+ { uint64_t x23 = (x14 & 0x1fffffffff);
+ { uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e165m25/fesub.c b/src/Specific/montgomery64_2e165m25/fesub.c
index a2b2ca75f..4484a9f3f 100644
--- a/src/Specific/montgomery64_2e165m25/fesub.c
+++ b/src/Specific/montgomery64_2e165m25/fesub.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
-{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
-{ uint64_t x22 = (x21 & 0xffffffffffffffe7L);
-{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
-{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
-{ uint64_t x30 = (x21 & 0x1fffffffff);
-{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
-out[0] = x32;
-out[1] = x28;
-out[2] = x24;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesub(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+ { uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+ { uint64_t x22 = (x21 & 0xffffffffffffffe7L);
+ { uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+ { uint64_t x26 = (x21 & 0xffffffffffffffffL);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+ { uint64_t x30 = (x21 & 0x1fffffffff);
+ { uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+ out[0] = x24;
+ out[1] = x28;
+ out[2] = x32;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e166m5/feadd.c b/src/Specific/montgomery64_2e166m5/feadd.c
index 9d3fdb43c..507af03c8 100644
--- a/src/Specific/montgomery64_2e166m5/feadd.c
+++ b/src/Specific/montgomery64_2e166m5/feadd.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffffbL, &x22);
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3fffffffff, &x28);
-{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
-{ uint64_t x33 = cmovznz(x32, x28, x19);
-{ uint64_t x34 = cmovznz(x32, x25, x16);
-{ uint64_t x35 = cmovznz(x32, x22, x13);
-out[0] = x33;
-out[1] = x34;
-out[2] = x35;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feadd(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffffbL, &x22);
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3fffffffff, &x28);
+ { uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+ { uint64_t x33 = cmovznz(x32, x28, x19);
+ { uint64_t x34 = cmovznz(x32, x25, x16);
+ { uint64_t x35 = cmovznz(x32, x22, x13);
+ out[0] = x35;
+ out[1] = x34;
+ out[2] = x33;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e166m5/femul.c b/src/Specific/montgomery64_2e166m5/femul.c
index 276f293b4..c86f09815 100644
--- a/src/Specific/montgomery64_2e166m5/femul.c
+++ b/src/Specific/montgomery64_2e166m5/femul.c
@@ -1,92 +1,80 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
-{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
-{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
-{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
-{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xcccccccccccccccdL, &_);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffffbL, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x3fffffffff, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
-{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
-{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
-{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
-{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
-{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
-{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
-{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0xcccccccccccccccdL, &_);
-{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffffbL, &x98);
-{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
-{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x3fffffffff, &x104);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
-{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
-{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
-{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
-{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
-{ uint8_t x126 = (x125 + x92);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
-{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
-{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
-{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
-{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0xcccccccccccccccdL, &_);
-{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffffbL, &x162);
-{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
-{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x3fffffffff, &x168);
-{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
-{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
-{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
-{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
-{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
-{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
-{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
-{ uint8_t x190 = (x189 + x156);
-{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffffbL, &x192);
-{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
-{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x3fffffffff, &x198);
-{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
-{ uint64_t x203 = cmovznz(x202, x198, x188);
-{ uint64_t x204 = cmovznz(x202, x195, x185);
-{ uint64_t x205 = cmovznz(x202, x192, x182);
-out[0] = x203;
-out[1] = x204;
-out[2] = x205;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+ { uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+ { uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+ { uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+ { uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xcccccccccccccccdL, &_);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffffbL, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x3fffffffff, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+ { uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+ { uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+ { uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+ { uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+ { uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+ { uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+ { uint64_t _; uint64_t x94 = _mulx_u64(x82, 0xcccccccccccccccdL, &_);
+ { uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffffbL, &x98);
+ { uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+ { uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x3fffffffff, &x104);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+ { uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+ { uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+ { uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+ { uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+ { uint8_t x126 = (x125 + x92);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+ { uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+ { uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+ { uint64_t _; uint64_t x158 = _mulx_u64(x146, 0xcccccccccccccccdL, &_);
+ { uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffffbL, &x162);
+ { uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+ { uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x3fffffffff, &x168);
+ { uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+ { uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+ { uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+ { uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+ { uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+ { uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+ { uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+ { uint8_t x190 = (x189 + x156);
+ { uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffffbL, &x192);
+ { uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+ { uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x3fffffffff, &x198);
+ { uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+ { uint64_t x203 = cmovznz(x202, x198, x188);
+ { uint64_t x204 = cmovznz(x202, x195, x185);
+ { uint64_t x205 = cmovznz(x202, x192, x182);
+ out[0] = x205;
+ out[1] = x204;
+ out[2] = x203;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e166m5/fenz.c b/src/Specific/montgomery64_2e166m5/fenz.c
index e29935ffc..2632a2866 100644
--- a/src/Specific/montgomery64_2e166m5/fenz.c
+++ b/src/Specific/montgomery64_2e166m5/fenz.c
@@ -1,24 +1,9 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x5 = (x4 | x3);
-{ uint64_t x6 = (x2 | x5);
-out[0] = x6;
-}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x5 = (x4 | x3);
+ { uint64_t x6 = (x2 | x5);
+ out[0] = x6;
+ }}}}}
+}
diff --git a/src/Specific/montgomery64_2e166m5/feopp.c b/src/Specific/montgomery64_2e166m5/feopp.c
index e44b1e56a..c20635533 100644
--- a/src/Specific/montgomery64_2e166m5/feopp.c
+++ b/src/Specific/montgomery64_2e166m5/feopp.c
@@ -1,34 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
-{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
-{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
-{ uint64_t x15 = (x14 & 0xfffffffffffffffbL);
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
-{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
-{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
-{ uint64_t x23 = (x14 & 0x3fffffffff);
-{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
-out[0] = x25;
-out[1] = x21;
-out[2] = x17;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feopp(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+ { uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0xfffffffffffffffbL);
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+ { uint64_t x19 = (x14 & 0xffffffffffffffffL);
+ { uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+ { uint64_t x23 = (x14 & 0x3fffffffff);
+ { uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e166m5/fesub.c b/src/Specific/montgomery64_2e166m5/fesub.c
index 310529ada..73060c9a2 100644
--- a/src/Specific/montgomery64_2e166m5/fesub.c
+++ b/src/Specific/montgomery64_2e166m5/fesub.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
-{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
-{ uint64_t x22 = (x21 & 0xfffffffffffffffbL);
-{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
-{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
-{ uint64_t x30 = (x21 & 0x3fffffffff);
-{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
-out[0] = x32;
-out[1] = x28;
-out[2] = x24;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesub(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+ { uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+ { uint64_t x22 = (x21 & 0xfffffffffffffffbL);
+ { uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+ { uint64_t x26 = (x21 & 0xffffffffffffffffL);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+ { uint64_t x30 = (x21 & 0x3fffffffff);
+ { uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+ out[0] = x24;
+ out[1] = x28;
+ out[2] = x32;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e171m19/feadd.c b/src/Specific/montgomery64_2e171m19/feadd.c
index 2b2f07eaa..080d2395e 100644
--- a/src/Specific/montgomery64_2e171m19/feadd.c
+++ b/src/Specific/montgomery64_2e171m19/feadd.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffedL, &x22);
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x7ffffffffff, &x28);
-{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
-{ uint64_t x33 = cmovznz(x32, x28, x19);
-{ uint64_t x34 = cmovznz(x32, x25, x16);
-{ uint64_t x35 = cmovznz(x32, x22, x13);
-out[0] = x33;
-out[1] = x34;
-out[2] = x35;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feadd(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffedL, &x22);
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x7ffffffffff, &x28);
+ { uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+ { uint64_t x33 = cmovznz(x32, x28, x19);
+ { uint64_t x34 = cmovznz(x32, x25, x16);
+ { uint64_t x35 = cmovznz(x32, x22, x13);
+ out[0] = x35;
+ out[1] = x34;
+ out[2] = x33;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e171m19/femul.c b/src/Specific/montgomery64_2e171m19/femul.c
index 5c3bb308e..c6633c047 100644
--- a/src/Specific/montgomery64_2e171m19/femul.c
+++ b/src/Specific/montgomery64_2e171m19/femul.c
@@ -1,92 +1,80 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
-{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
-{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
-{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
-{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x86bca1af286bca1bL, &_);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffedL, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x7ffffffffff, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
-{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
-{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
-{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
-{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
-{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
-{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
-{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x86bca1af286bca1bL, &_);
-{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xffffffffffffffedL, &x98);
-{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
-{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x7ffffffffff, &x104);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
-{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
-{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
-{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
-{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
-{ uint8_t x126 = (x125 + x92);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
-{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
-{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
-{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
-{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x86bca1af286bca1bL, &_);
-{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xffffffffffffffedL, &x162);
-{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
-{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x7ffffffffff, &x168);
-{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
-{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
-{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
-{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
-{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
-{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
-{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
-{ uint8_t x190 = (x189 + x156);
-{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xffffffffffffffedL, &x192);
-{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
-{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x7ffffffffff, &x198);
-{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
-{ uint64_t x203 = cmovznz(x202, x198, x188);
-{ uint64_t x204 = cmovznz(x202, x195, x185);
-{ uint64_t x205 = cmovznz(x202, x192, x182);
-out[0] = x203;
-out[1] = x204;
-out[2] = x205;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+ { uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+ { uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+ { uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+ { uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x86bca1af286bca1bL, &_);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffedL, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x7ffffffffff, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+ { uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+ { uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+ { uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+ { uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+ { uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+ { uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+ { uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x86bca1af286bca1bL, &_);
+ { uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xffffffffffffffedL, &x98);
+ { uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+ { uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x7ffffffffff, &x104);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+ { uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+ { uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+ { uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+ { uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+ { uint8_t x126 = (x125 + x92);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+ { uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+ { uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+ { uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x86bca1af286bca1bL, &_);
+ { uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xffffffffffffffedL, &x162);
+ { uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+ { uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x7ffffffffff, &x168);
+ { uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+ { uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+ { uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+ { uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+ { uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+ { uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+ { uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+ { uint8_t x190 = (x189 + x156);
+ { uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xffffffffffffffedL, &x192);
+ { uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+ { uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x7ffffffffff, &x198);
+ { uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+ { uint64_t x203 = cmovznz(x202, x198, x188);
+ { uint64_t x204 = cmovznz(x202, x195, x185);
+ { uint64_t x205 = cmovznz(x202, x192, x182);
+ out[0] = x205;
+ out[1] = x204;
+ out[2] = x203;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e171m19/fenz.c b/src/Specific/montgomery64_2e171m19/fenz.c
index e29935ffc..2632a2866 100644
--- a/src/Specific/montgomery64_2e171m19/fenz.c
+++ b/src/Specific/montgomery64_2e171m19/fenz.c
@@ -1,24 +1,9 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x5 = (x4 | x3);
-{ uint64_t x6 = (x2 | x5);
-out[0] = x6;
-}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x5 = (x4 | x3);
+ { uint64_t x6 = (x2 | x5);
+ out[0] = x6;
+ }}}}}
+}
diff --git a/src/Specific/montgomery64_2e171m19/feopp.c b/src/Specific/montgomery64_2e171m19/feopp.c
index 2f9d970ef..f6798e8ea 100644
--- a/src/Specific/montgomery64_2e171m19/feopp.c
+++ b/src/Specific/montgomery64_2e171m19/feopp.c
@@ -1,34 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
-{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
-{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
-{ uint64_t x15 = (x14 & 0xffffffffffffffedL);
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
-{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
-{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
-{ uint64_t x23 = (x14 & 0x7ffffffffff);
-{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
-out[0] = x25;
-out[1] = x21;
-out[2] = x17;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feopp(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+ { uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0xffffffffffffffedL);
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+ { uint64_t x19 = (x14 & 0xffffffffffffffffL);
+ { uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+ { uint64_t x23 = (x14 & 0x7ffffffffff);
+ { uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e171m19/fesub.c b/src/Specific/montgomery64_2e171m19/fesub.c
index e767e7877..0ad3de872 100644
--- a/src/Specific/montgomery64_2e171m19/fesub.c
+++ b/src/Specific/montgomery64_2e171m19/fesub.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
-{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
-{ uint64_t x22 = (x21 & 0xffffffffffffffedL);
-{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
-{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
-{ uint64_t x30 = (x21 & 0x7ffffffffff);
-{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
-out[0] = x32;
-out[1] = x28;
-out[2] = x24;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesub(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+ { uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+ { uint64_t x22 = (x21 & 0xffffffffffffffedL);
+ { uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+ { uint64_t x26 = (x21 & 0xffffffffffffffffL);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+ { uint64_t x30 = (x21 & 0x7ffffffffff);
+ { uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+ out[0] = x24;
+ out[1] = x28;
+ out[2] = x32;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e174m17/feadd.c b/src/Specific/montgomery64_2e174m17/feadd.c
index f7638f962..77590f27c 100644
--- a/src/Specific/montgomery64_2e174m17/feadd.c
+++ b/src/Specific/montgomery64_2e174m17/feadd.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffefL, &x22);
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3fffffffffff, &x28);
-{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
-{ uint64_t x33 = cmovznz(x32, x28, x19);
-{ uint64_t x34 = cmovznz(x32, x25, x16);
-{ uint64_t x35 = cmovznz(x32, x22, x13);
-out[0] = x33;
-out[1] = x34;
-out[2] = x35;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feadd(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffefL, &x22);
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3fffffffffff, &x28);
+ { uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+ { uint64_t x33 = cmovznz(x32, x28, x19);
+ { uint64_t x34 = cmovznz(x32, x25, x16);
+ { uint64_t x35 = cmovznz(x32, x22, x13);
+ out[0] = x35;
+ out[1] = x34;
+ out[2] = x33;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e174m17/femul.c b/src/Specific/montgomery64_2e174m17/femul.c
index 11665587a..91d068ac8 100644
--- a/src/Specific/montgomery64_2e174m17/femul.c
+++ b/src/Specific/montgomery64_2e174m17/femul.c
@@ -1,92 +1,80 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
-{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
-{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
-{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
-{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xf0f0f0f0f0f0f0f1L, &_);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffefL, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x3fffffffffff, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
-{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
-{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
-{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
-{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
-{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
-{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
-{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0xf0f0f0f0f0f0f0f1L, &_);
-{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xffffffffffffffefL, &x98);
-{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
-{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x3fffffffffff, &x104);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
-{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
-{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
-{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
-{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
-{ uint8_t x126 = (x125 + x92);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
-{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
-{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
-{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
-{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0xf0f0f0f0f0f0f0f1L, &_);
-{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xffffffffffffffefL, &x162);
-{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
-{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x3fffffffffff, &x168);
-{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
-{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
-{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
-{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
-{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
-{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
-{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
-{ uint8_t x190 = (x189 + x156);
-{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xffffffffffffffefL, &x192);
-{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
-{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x3fffffffffff, &x198);
-{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
-{ uint64_t x203 = cmovznz(x202, x198, x188);
-{ uint64_t x204 = cmovznz(x202, x195, x185);
-{ uint64_t x205 = cmovznz(x202, x192, x182);
-out[0] = x203;
-out[1] = x204;
-out[2] = x205;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+ { uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+ { uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+ { uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+ { uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xf0f0f0f0f0f0f0f1L, &_);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffefL, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x3fffffffffff, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+ { uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+ { uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+ { uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+ { uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+ { uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+ { uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+ { uint64_t _; uint64_t x94 = _mulx_u64(x82, 0xf0f0f0f0f0f0f0f1L, &_);
+ { uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xffffffffffffffefL, &x98);
+ { uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+ { uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x3fffffffffff, &x104);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+ { uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+ { uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+ { uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+ { uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+ { uint8_t x126 = (x125 + x92);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+ { uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+ { uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+ { uint64_t _; uint64_t x158 = _mulx_u64(x146, 0xf0f0f0f0f0f0f0f1L, &_);
+ { uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xffffffffffffffefL, &x162);
+ { uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+ { uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x3fffffffffff, &x168);
+ { uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+ { uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+ { uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+ { uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+ { uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+ { uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+ { uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+ { uint8_t x190 = (x189 + x156);
+ { uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xffffffffffffffefL, &x192);
+ { uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+ { uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x3fffffffffff, &x198);
+ { uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+ { uint64_t x203 = cmovznz(x202, x198, x188);
+ { uint64_t x204 = cmovznz(x202, x195, x185);
+ { uint64_t x205 = cmovznz(x202, x192, x182);
+ out[0] = x205;
+ out[1] = x204;
+ out[2] = x203;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e174m17/fenz.c b/src/Specific/montgomery64_2e174m17/fenz.c
index e29935ffc..2632a2866 100644
--- a/src/Specific/montgomery64_2e174m17/fenz.c
+++ b/src/Specific/montgomery64_2e174m17/fenz.c
@@ -1,24 +1,9 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x5 = (x4 | x3);
-{ uint64_t x6 = (x2 | x5);
-out[0] = x6;
-}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x5 = (x4 | x3);
+ { uint64_t x6 = (x2 | x5);
+ out[0] = x6;
+ }}}}}
+}
diff --git a/src/Specific/montgomery64_2e174m17/feopp.c b/src/Specific/montgomery64_2e174m17/feopp.c
index fd6a9ef94..2e095e15d 100644
--- a/src/Specific/montgomery64_2e174m17/feopp.c
+++ b/src/Specific/montgomery64_2e174m17/feopp.c
@@ -1,34 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
-{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
-{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
-{ uint64_t x15 = (x14 & 0xffffffffffffffefL);
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
-{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
-{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
-{ uint64_t x23 = (x14 & 0x3fffffffffff);
-{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
-out[0] = x25;
-out[1] = x21;
-out[2] = x17;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feopp(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+ { uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0xffffffffffffffefL);
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+ { uint64_t x19 = (x14 & 0xffffffffffffffffL);
+ { uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+ { uint64_t x23 = (x14 & 0x3fffffffffff);
+ { uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e174m17/fesub.c b/src/Specific/montgomery64_2e174m17/fesub.c
index 262cd52ed..fa05c1792 100644
--- a/src/Specific/montgomery64_2e174m17/fesub.c
+++ b/src/Specific/montgomery64_2e174m17/fesub.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
-{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
-{ uint64_t x22 = (x21 & 0xffffffffffffffefL);
-{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
-{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
-{ uint64_t x30 = (x21 & 0x3fffffffffff);
-{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
-out[0] = x32;
-out[1] = x28;
-out[2] = x24;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesub(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+ { uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+ { uint64_t x22 = (x21 & 0xffffffffffffffefL);
+ { uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+ { uint64_t x26 = (x21 & 0xffffffffffffffffL);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+ { uint64_t x30 = (x21 & 0x3fffffffffff);
+ { uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+ out[0] = x24;
+ out[1] = x28;
+ out[2] = x32;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e174m3/feadd.c b/src/Specific/montgomery64_2e174m3/feadd.c
index 29ed47397..af34d80c4 100644
--- a/src/Specific/montgomery64_2e174m3/feadd.c
+++ b/src/Specific/montgomery64_2e174m3/feadd.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffffdL, &x22);
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3fffffffffff, &x28);
-{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
-{ uint64_t x33 = cmovznz(x32, x28, x19);
-{ uint64_t x34 = cmovznz(x32, x25, x16);
-{ uint64_t x35 = cmovznz(x32, x22, x13);
-out[0] = x33;
-out[1] = x34;
-out[2] = x35;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feadd(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffffdL, &x22);
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3fffffffffff, &x28);
+ { uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+ { uint64_t x33 = cmovznz(x32, x28, x19);
+ { uint64_t x34 = cmovznz(x32, x25, x16);
+ { uint64_t x35 = cmovznz(x32, x22, x13);
+ out[0] = x35;
+ out[1] = x34;
+ out[2] = x33;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e174m3/femul.c b/src/Specific/montgomery64_2e174m3/femul.c
index 972ea2079..84481c785 100644
--- a/src/Specific/montgomery64_2e174m3/femul.c
+++ b/src/Specific/montgomery64_2e174m3/femul.c
@@ -1,92 +1,80 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
-{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
-{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
-{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
-{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffffdL, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x3fffffffffff, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
-{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
-{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
-{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
-{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
-{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
-{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
-{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffffdL, &x98);
-{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
-{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x3fffffffffff, &x104);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
-{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
-{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
-{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
-{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
-{ uint8_t x126 = (x125 + x92);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
-{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
-{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
-{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
-{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffffdL, &x162);
-{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
-{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x3fffffffffff, &x168);
-{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
-{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
-{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
-{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
-{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
-{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
-{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
-{ uint8_t x190 = (x189 + x156);
-{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffffdL, &x192);
-{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
-{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x3fffffffffff, &x198);
-{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
-{ uint64_t x203 = cmovznz(x202, x198, x188);
-{ uint64_t x204 = cmovznz(x202, x195, x185);
-{ uint64_t x205 = cmovznz(x202, x192, x182);
-out[0] = x203;
-out[1] = x204;
-out[2] = x205;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+ { uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+ { uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+ { uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+ { uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffffdL, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x3fffffffffff, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+ { uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+ { uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+ { uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+ { uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+ { uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+ { uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+ { uint64_t _; uint64_t x94 = _mulx_u64(x82, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffffdL, &x98);
+ { uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+ { uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x3fffffffffff, &x104);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+ { uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+ { uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+ { uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+ { uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+ { uint8_t x126 = (x125 + x92);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+ { uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+ { uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+ { uint64_t _; uint64_t x158 = _mulx_u64(x146, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffffdL, &x162);
+ { uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+ { uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x3fffffffffff, &x168);
+ { uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+ { uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+ { uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+ { uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+ { uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+ { uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+ { uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+ { uint8_t x190 = (x189 + x156);
+ { uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffffdL, &x192);
+ { uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+ { uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x3fffffffffff, &x198);
+ { uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+ { uint64_t x203 = cmovznz(x202, x198, x188);
+ { uint64_t x204 = cmovznz(x202, x195, x185);
+ { uint64_t x205 = cmovznz(x202, x192, x182);
+ out[0] = x205;
+ out[1] = x204;
+ out[2] = x203;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e174m3/fenz.c b/src/Specific/montgomery64_2e174m3/fenz.c
index e29935ffc..2632a2866 100644
--- a/src/Specific/montgomery64_2e174m3/fenz.c
+++ b/src/Specific/montgomery64_2e174m3/fenz.c
@@ -1,24 +1,9 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x5 = (x4 | x3);
-{ uint64_t x6 = (x2 | x5);
-out[0] = x6;
-}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x5 = (x4 | x3);
+ { uint64_t x6 = (x2 | x5);
+ out[0] = x6;
+ }}}}}
+}
diff --git a/src/Specific/montgomery64_2e174m3/feopp.c b/src/Specific/montgomery64_2e174m3/feopp.c
index 70ac07364..bc3c5bdf1 100644
--- a/src/Specific/montgomery64_2e174m3/feopp.c
+++ b/src/Specific/montgomery64_2e174m3/feopp.c
@@ -1,34 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
-{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
-{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
-{ uint64_t x15 = (x14 & 0xfffffffffffffffdL);
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
-{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
-{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
-{ uint64_t x23 = (x14 & 0x3fffffffffff);
-{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
-out[0] = x25;
-out[1] = x21;
-out[2] = x17;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feopp(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+ { uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0xfffffffffffffffdL);
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+ { uint64_t x19 = (x14 & 0xffffffffffffffffL);
+ { uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+ { uint64_t x23 = (x14 & 0x3fffffffffff);
+ { uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e174m3/fesub.c b/src/Specific/montgomery64_2e174m3/fesub.c
index 32fe676c0..619fe8917 100644
--- a/src/Specific/montgomery64_2e174m3/fesub.c
+++ b/src/Specific/montgomery64_2e174m3/fesub.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
-{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
-{ uint64_t x22 = (x21 & 0xfffffffffffffffdL);
-{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
-{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
-{ uint64_t x30 = (x21 & 0x3fffffffffff);
-{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
-out[0] = x32;
-out[1] = x28;
-out[2] = x24;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesub(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+ { uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+ { uint64_t x22 = (x21 & 0xfffffffffffffffdL);
+ { uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+ { uint64_t x26 = (x21 & 0xffffffffffffffffL);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+ { uint64_t x30 = (x21 & 0x3fffffffffff);
+ { uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+ out[0] = x24;
+ out[1] = x28;
+ out[2] = x32;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e189m25/feadd.c b/src/Specific/montgomery64_2e189m25/feadd.c
index 436c948b4..349f4d564 100644
--- a/src/Specific/montgomery64_2e189m25/feadd.c
+++ b/src/Specific/montgomery64_2e189m25/feadd.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffe7L, &x22);
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x1fffffffffffffff, &x28);
-{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
-{ uint64_t x33 = cmovznz(x32, x28, x19);
-{ uint64_t x34 = cmovznz(x32, x25, x16);
-{ uint64_t x35 = cmovznz(x32, x22, x13);
-out[0] = x33;
-out[1] = x34;
-out[2] = x35;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feadd(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffe7L, &x22);
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x1fffffffffffffff, &x28);
+ { uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+ { uint64_t x33 = cmovznz(x32, x28, x19);
+ { uint64_t x34 = cmovznz(x32, x25, x16);
+ { uint64_t x35 = cmovznz(x32, x22, x13);
+ out[0] = x35;
+ out[1] = x34;
+ out[2] = x33;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e189m25/femul.c b/src/Specific/montgomery64_2e189m25/femul.c
index 0f9466fef..a757e29a7 100644
--- a/src/Specific/montgomery64_2e189m25/femul.c
+++ b/src/Specific/montgomery64_2e189m25/femul.c
@@ -1,92 +1,80 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
-{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
-{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
-{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
-{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x8f5c28f5c28f5c29L, &_);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffe7L, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x1fffffffffffffff, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
-{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
-{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
-{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
-{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
-{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
-{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
-{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x8f5c28f5c28f5c29L, &_);
-{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xffffffffffffffe7L, &x98);
-{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
-{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x1fffffffffffffff, &x104);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
-{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
-{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
-{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
-{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
-{ uint8_t x126 = (x125 + x92);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
-{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
-{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
-{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
-{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x8f5c28f5c28f5c29L, &_);
-{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xffffffffffffffe7L, &x162);
-{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
-{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x1fffffffffffffff, &x168);
-{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
-{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
-{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
-{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
-{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
-{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
-{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
-{ uint8_t x190 = (x189 + x156);
-{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xffffffffffffffe7L, &x192);
-{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
-{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x1fffffffffffffff, &x198);
-{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
-{ uint64_t x203 = cmovznz(x202, x198, x188);
-{ uint64_t x204 = cmovznz(x202, x195, x185);
-{ uint64_t x205 = cmovznz(x202, x192, x182);
-out[0] = x203;
-out[1] = x204;
-out[2] = x205;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+ { uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+ { uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+ { uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+ { uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x8f5c28f5c28f5c29L, &_);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffe7L, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x1fffffffffffffff, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+ { uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+ { uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+ { uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+ { uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+ { uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+ { uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+ { uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x8f5c28f5c28f5c29L, &_);
+ { uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xffffffffffffffe7L, &x98);
+ { uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+ { uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x1fffffffffffffff, &x104);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+ { uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+ { uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+ { uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+ { uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+ { uint8_t x126 = (x125 + x92);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+ { uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+ { uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+ { uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x8f5c28f5c28f5c29L, &_);
+ { uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xffffffffffffffe7L, &x162);
+ { uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+ { uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x1fffffffffffffff, &x168);
+ { uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+ { uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+ { uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+ { uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+ { uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+ { uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+ { uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+ { uint8_t x190 = (x189 + x156);
+ { uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xffffffffffffffe7L, &x192);
+ { uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+ { uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x1fffffffffffffff, &x198);
+ { uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+ { uint64_t x203 = cmovznz(x202, x198, x188);
+ { uint64_t x204 = cmovznz(x202, x195, x185);
+ { uint64_t x205 = cmovznz(x202, x192, x182);
+ out[0] = x205;
+ out[1] = x204;
+ out[2] = x203;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e189m25/fenz.c b/src/Specific/montgomery64_2e189m25/fenz.c
index e29935ffc..2632a2866 100644
--- a/src/Specific/montgomery64_2e189m25/fenz.c
+++ b/src/Specific/montgomery64_2e189m25/fenz.c
@@ -1,24 +1,9 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x5 = (x4 | x3);
-{ uint64_t x6 = (x2 | x5);
-out[0] = x6;
-}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x5 = (x4 | x3);
+ { uint64_t x6 = (x2 | x5);
+ out[0] = x6;
+ }}}}}
+}
diff --git a/src/Specific/montgomery64_2e189m25/feopp.c b/src/Specific/montgomery64_2e189m25/feopp.c
index 42a1bc53f..e30928d40 100644
--- a/src/Specific/montgomery64_2e189m25/feopp.c
+++ b/src/Specific/montgomery64_2e189m25/feopp.c
@@ -1,34 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
-{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
-{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
-{ uint64_t x15 = (x14 & 0xffffffffffffffe7L);
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
-{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
-{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
-{ uint64_t x23 = (x14 & 0x1fffffffffffffff);
-{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
-out[0] = x25;
-out[1] = x21;
-out[2] = x17;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feopp(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+ { uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0xffffffffffffffe7L);
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+ { uint64_t x19 = (x14 & 0xffffffffffffffffL);
+ { uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+ { uint64_t x23 = (x14 & 0x1fffffffffffffff);
+ { uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e189m25/fesub.c b/src/Specific/montgomery64_2e189m25/fesub.c
index 8274703eb..475de3c15 100644
--- a/src/Specific/montgomery64_2e189m25/fesub.c
+++ b/src/Specific/montgomery64_2e189m25/fesub.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
-{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
-{ uint64_t x22 = (x21 & 0xffffffffffffffe7L);
-{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
-{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
-{ uint64_t x30 = (x21 & 0x1fffffffffffffff);
-{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
-out[0] = x32;
-out[1] = x28;
-out[2] = x24;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesub(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+ { uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+ { uint64_t x22 = (x21 & 0xffffffffffffffe7L);
+ { uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+ { uint64_t x26 = (x21 & 0xffffffffffffffffL);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+ { uint64_t x30 = (x21 & 0x1fffffffffffffff);
+ { uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+ out[0] = x24;
+ out[1] = x28;
+ out[2] = x32;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e190m11/feadd.c b/src/Specific/montgomery64_2e190m11/feadd.c
index 34d22e878..f1b4f1922 100644
--- a/src/Specific/montgomery64_2e190m11/feadd.c
+++ b/src/Specific/montgomery64_2e190m11/feadd.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffff5L, &x22);
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3fffffffffffffff, &x28);
-{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
-{ uint64_t x33 = cmovznz(x32, x28, x19);
-{ uint64_t x34 = cmovznz(x32, x25, x16);
-{ uint64_t x35 = cmovznz(x32, x22, x13);
-out[0] = x33;
-out[1] = x34;
-out[2] = x35;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feadd(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffff5L, &x22);
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3fffffffffffffff, &x28);
+ { uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+ { uint64_t x33 = cmovznz(x32, x28, x19);
+ { uint64_t x34 = cmovznz(x32, x25, x16);
+ { uint64_t x35 = cmovznz(x32, x22, x13);
+ out[0] = x35;
+ out[1] = x34;
+ out[2] = x33;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e190m11/femul.c b/src/Specific/montgomery64_2e190m11/femul.c
index 4be4d3a3c..824773dee 100644
--- a/src/Specific/montgomery64_2e190m11/femul.c
+++ b/src/Specific/montgomery64_2e190m11/femul.c
@@ -1,92 +1,80 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
-{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
-{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
-{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
-{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x2e8ba2e8ba2e8ba3, &_);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffff5L, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x3fffffffffffffff, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
-{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
-{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
-{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
-{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
-{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
-{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
-{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x2e8ba2e8ba2e8ba3, &_);
-{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffff5L, &x98);
-{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
-{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x3fffffffffffffff, &x104);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
-{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
-{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
-{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
-{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
-{ uint8_t x126 = (x125 + x92);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
-{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
-{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
-{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
-{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x2e8ba2e8ba2e8ba3, &_);
-{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffff5L, &x162);
-{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
-{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x3fffffffffffffff, &x168);
-{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
-{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
-{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
-{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
-{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
-{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
-{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
-{ uint8_t x190 = (x189 + x156);
-{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffff5L, &x192);
-{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
-{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x3fffffffffffffff, &x198);
-{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
-{ uint64_t x203 = cmovznz(x202, x198, x188);
-{ uint64_t x204 = cmovznz(x202, x195, x185);
-{ uint64_t x205 = cmovznz(x202, x192, x182);
-out[0] = x203;
-out[1] = x204;
-out[2] = x205;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+ { uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+ { uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+ { uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+ { uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x2e8ba2e8ba2e8ba3, &_);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffff5L, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x3fffffffffffffff, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+ { uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+ { uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+ { uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+ { uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+ { uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+ { uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+ { uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x2e8ba2e8ba2e8ba3, &_);
+ { uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffff5L, &x98);
+ { uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+ { uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x3fffffffffffffff, &x104);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+ { uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+ { uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+ { uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+ { uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+ { uint8_t x126 = (x125 + x92);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+ { uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+ { uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+ { uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x2e8ba2e8ba2e8ba3, &_);
+ { uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffff5L, &x162);
+ { uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+ { uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x3fffffffffffffff, &x168);
+ { uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+ { uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+ { uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+ { uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+ { uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+ { uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+ { uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+ { uint8_t x190 = (x189 + x156);
+ { uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffff5L, &x192);
+ { uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+ { uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x3fffffffffffffff, &x198);
+ { uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+ { uint64_t x203 = cmovznz(x202, x198, x188);
+ { uint64_t x204 = cmovznz(x202, x195, x185);
+ { uint64_t x205 = cmovznz(x202, x192, x182);
+ out[0] = x205;
+ out[1] = x204;
+ out[2] = x203;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e190m11/fenz.c b/src/Specific/montgomery64_2e190m11/fenz.c
index e29935ffc..2632a2866 100644
--- a/src/Specific/montgomery64_2e190m11/fenz.c
+++ b/src/Specific/montgomery64_2e190m11/fenz.c
@@ -1,24 +1,9 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x5 = (x4 | x3);
-{ uint64_t x6 = (x2 | x5);
-out[0] = x6;
-}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x5 = (x4 | x3);
+ { uint64_t x6 = (x2 | x5);
+ out[0] = x6;
+ }}}}}
+}
diff --git a/src/Specific/montgomery64_2e190m11/feopp.c b/src/Specific/montgomery64_2e190m11/feopp.c
index 87b15542f..082a54eb9 100644
--- a/src/Specific/montgomery64_2e190m11/feopp.c
+++ b/src/Specific/montgomery64_2e190m11/feopp.c
@@ -1,34 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
-{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
-{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
-{ uint64_t x15 = (x14 & 0xfffffffffffffff5L);
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
-{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
-{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
-{ uint64_t x23 = (x14 & 0x3fffffffffffffff);
-{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
-out[0] = x25;
-out[1] = x21;
-out[2] = x17;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feopp(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+ { uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0xfffffffffffffff5L);
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+ { uint64_t x19 = (x14 & 0xffffffffffffffffL);
+ { uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+ { uint64_t x23 = (x14 & 0x3fffffffffffffff);
+ { uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e190m11/fesub.c b/src/Specific/montgomery64_2e190m11/fesub.c
index 6199b9f1c..709d49d56 100644
--- a/src/Specific/montgomery64_2e190m11/fesub.c
+++ b/src/Specific/montgomery64_2e190m11/fesub.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
-{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
-{ uint64_t x22 = (x21 & 0xfffffffffffffff5L);
-{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
-{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
-{ uint64_t x30 = (x21 & 0x3fffffffffffffff);
-{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
-out[0] = x32;
-out[1] = x28;
-out[2] = x24;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesub(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+ { uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+ { uint64_t x22 = (x21 & 0xfffffffffffffff5L);
+ { uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+ { uint64_t x26 = (x21 & 0xffffffffffffffffL);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+ { uint64_t x30 = (x21 & 0x3fffffffffffffff);
+ { uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+ out[0] = x24;
+ out[1] = x28;
+ out[2] = x32;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e191m19/feadd.c b/src/Specific/montgomery64_2e191m19/feadd.c
index ca789cbcc..273495250 100644
--- a/src/Specific/montgomery64_2e191m19/feadd.c
+++ b/src/Specific/montgomery64_2e191m19/feadd.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffedL, &x22);
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x7fffffffffffffffL, &x28);
-{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
-{ uint64_t x33 = cmovznz(x32, x28, x19);
-{ uint64_t x34 = cmovznz(x32, x25, x16);
-{ uint64_t x35 = cmovznz(x32, x22, x13);
-out[0] = x33;
-out[1] = x34;
-out[2] = x35;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feadd(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffedL, &x22);
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x7fffffffffffffffL, &x28);
+ { uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+ { uint64_t x33 = cmovznz(x32, x28, x19);
+ { uint64_t x34 = cmovznz(x32, x25, x16);
+ { uint64_t x35 = cmovznz(x32, x22, x13);
+ out[0] = x35;
+ out[1] = x34;
+ out[2] = x33;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e191m19/femul.c b/src/Specific/montgomery64_2e191m19/femul.c
index 2d3408128..2bb56c397 100644
--- a/src/Specific/montgomery64_2e191m19/femul.c
+++ b/src/Specific/montgomery64_2e191m19/femul.c
@@ -1,92 +1,80 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
-{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
-{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
-{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
-{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x86bca1af286bca1bL, &_);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffedL, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x7fffffffffffffffL, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
-{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
-{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
-{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
-{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
-{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
-{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
-{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x86bca1af286bca1bL, &_);
-{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xffffffffffffffedL, &x98);
-{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
-{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x7fffffffffffffffL, &x104);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
-{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
-{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
-{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
-{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
-{ uint8_t x126 = (x125 + x92);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
-{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
-{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
-{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
-{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x86bca1af286bca1bL, &_);
-{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xffffffffffffffedL, &x162);
-{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
-{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x7fffffffffffffffL, &x168);
-{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
-{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
-{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
-{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
-{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
-{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
-{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
-{ uint8_t x190 = (x189 + x156);
-{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xffffffffffffffedL, &x192);
-{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
-{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x7fffffffffffffffL, &x198);
-{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
-{ uint64_t x203 = cmovznz(x202, x198, x188);
-{ uint64_t x204 = cmovznz(x202, x195, x185);
-{ uint64_t x205 = cmovznz(x202, x192, x182);
-out[0] = x203;
-out[1] = x204;
-out[2] = x205;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+ { uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+ { uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+ { uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+ { uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x86bca1af286bca1bL, &_);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffedL, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x7fffffffffffffffL, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+ { uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+ { uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+ { uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+ { uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+ { uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+ { uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+ { uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x86bca1af286bca1bL, &_);
+ { uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xffffffffffffffedL, &x98);
+ { uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+ { uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x7fffffffffffffffL, &x104);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+ { uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+ { uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+ { uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+ { uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+ { uint8_t x126 = (x125 + x92);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+ { uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+ { uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+ { uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x86bca1af286bca1bL, &_);
+ { uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xffffffffffffffedL, &x162);
+ { uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+ { uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x7fffffffffffffffL, &x168);
+ { uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+ { uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+ { uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+ { uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+ { uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+ { uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+ { uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+ { uint8_t x190 = (x189 + x156);
+ { uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xffffffffffffffedL, &x192);
+ { uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+ { uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x7fffffffffffffffL, &x198);
+ { uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+ { uint64_t x203 = cmovznz(x202, x198, x188);
+ { uint64_t x204 = cmovznz(x202, x195, x185);
+ { uint64_t x205 = cmovznz(x202, x192, x182);
+ out[0] = x205;
+ out[1] = x204;
+ out[2] = x203;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e191m19/fenz.c b/src/Specific/montgomery64_2e191m19/fenz.c
index e29935ffc..2632a2866 100644
--- a/src/Specific/montgomery64_2e191m19/fenz.c
+++ b/src/Specific/montgomery64_2e191m19/fenz.c
@@ -1,24 +1,9 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x5 = (x4 | x3);
-{ uint64_t x6 = (x2 | x5);
-out[0] = x6;
-}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x5 = (x4 | x3);
+ { uint64_t x6 = (x2 | x5);
+ out[0] = x6;
+ }}}}}
+}
diff --git a/src/Specific/montgomery64_2e191m19/feopp.c b/src/Specific/montgomery64_2e191m19/feopp.c
index 9e4a88a93..603f84e85 100644
--- a/src/Specific/montgomery64_2e191m19/feopp.c
+++ b/src/Specific/montgomery64_2e191m19/feopp.c
@@ -1,34 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
-{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
-{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
-{ uint64_t x15 = (x14 & 0xffffffffffffffedL);
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
-{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
-{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
-{ uint64_t x23 = (x14 & 0x7fffffffffffffffL);
-{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
-out[0] = x25;
-out[1] = x21;
-out[2] = x17;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feopp(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+ { uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0xffffffffffffffedL);
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+ { uint64_t x19 = (x14 & 0xffffffffffffffffL);
+ { uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+ { uint64_t x23 = (x14 & 0x7fffffffffffffffL);
+ { uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e191m19/fesub.c b/src/Specific/montgomery64_2e191m19/fesub.c
index 20d305083..ec64f7e84 100644
--- a/src/Specific/montgomery64_2e191m19/fesub.c
+++ b/src/Specific/montgomery64_2e191m19/fesub.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
-{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
-{ uint64_t x22 = (x21 & 0xffffffffffffffedL);
-{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
-{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
-{ uint64_t x30 = (x21 & 0x7fffffffffffffffL);
-{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
-out[0] = x32;
-out[1] = x28;
-out[2] = x24;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesub(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+ { uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+ { uint64_t x22 = (x21 & 0xffffffffffffffedL);
+ { uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+ { uint64_t x26 = (x21 & 0xffffffffffffffffL);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+ { uint64_t x30 = (x21 & 0x7fffffffffffffffL);
+ { uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+ out[0] = x24;
+ out[1] = x28;
+ out[2] = x32;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e192m2e64m1/feadd.c b/src/Specific/montgomery64_2e192m2e64m1/feadd.c
index 66b2d1360..8f89a89db 100644
--- a/src/Specific/montgomery64_2e192m2e64m1/feadd.c
+++ b/src/Specific/montgomery64_2e192m2e64m1/feadd.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffffL, &x22);
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xfffffffffffffffeL, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0xffffffffffffffffL, &x28);
-{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
-{ uint64_t x33 = cmovznz(x32, x28, x19);
-{ uint64_t x34 = cmovznz(x32, x25, x16);
-{ uint64_t x35 = cmovznz(x32, x22, x13);
-out[0] = x33;
-out[1] = x34;
-out[2] = x35;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feadd(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffffL, &x22);
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xfffffffffffffffeL, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0xffffffffffffffffL, &x28);
+ { uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+ { uint64_t x33 = cmovznz(x32, x28, x19);
+ { uint64_t x34 = cmovznz(x32, x25, x16);
+ { uint64_t x35 = cmovznz(x32, x22, x13);
+ out[0] = x35;
+ out[1] = x34;
+ out[2] = x33;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e192m2e64m1/femul.c b/src/Specific/montgomery64_2e192m2e64m1/femul.c
index b6a9f8dce..08168931a 100644
--- a/src/Specific/montgomery64_2e192m2e64m1/femul.c
+++ b/src/Specific/montgomery64_2e192m2e64m1/femul.c
@@ -1,89 +1,77 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
-{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
-{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
-{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
-{ uint64_t x32; uint64_t x31 = _mulx_u64(x13, 0xffffffffffffffffL, &x32);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x13, 0xfffffffffffffffeL, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x13, 0xffffffffffffffffL, &x38);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(0x0, x32, x34, &x40);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(x41, x35, x37, &x43);
-{ uint64_t x46; uint8_t _ = _addcarryx_u64(0x0, x44, x38, &x46);
-{ uint64_t _; uint8_t x50 = _addcarryx_u64(0x0, x13, x31, &_);
-{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x22, x40, &x52);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x25, x43, &x55);
-{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x28, x46, &x58);
-{ uint64_t x62; uint64_t x61 = _mulx_u64(x7, x9, &x62);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x11, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x10, &x68);
-{ uint64_t x70; uint8_t x71 = _addcarryx_u64(0x0, x62, x64, &x70);
-{ uint64_t x73; uint8_t x74 = _addcarryx_u64(x71, x65, x67, &x73);
-{ uint64_t x76; uint8_t _ = _addcarryx_u64(0x0, x74, x68, &x76);
-{ uint64_t x79; uint8_t x80 = _addcarryx_u64(0x0, x52, x61, &x79);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(x80, x55, x70, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x59, x76, &x88);
-{ uint64_t x92; uint64_t x91 = _mulx_u64(x79, 0xffffffffffffffffL, &x92);
-{ uint64_t x95; uint64_t x94 = _mulx_u64(x79, 0xfffffffffffffffeL, &x95);
-{ uint64_t x98; uint64_t x97 = _mulx_u64(x79, 0xffffffffffffffffL, &x98);
-{ uint64_t x100; uint8_t x101 = _addcarryx_u64(0x0, x92, x94, &x100);
-{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x95, x97, &x103);
-{ uint64_t x106; uint8_t _ = _addcarryx_u64(0x0, x104, x98, &x106);
-{ uint64_t _; uint8_t x110 = _addcarryx_u64(0x0, x79, x91, &_);
-{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x82, x100, &x112);
-{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x85, x103, &x115);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x88, x106, &x118);
-{ uint8_t x120 = (x119 + x89);
-{ uint64_t x123; uint64_t x122 = _mulx_u64(x6, x9, &x123);
-{ uint64_t x126; uint64_t x125 = _mulx_u64(x6, x11, &x126);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x10, &x129);
-{ uint64_t x131; uint8_t x132 = _addcarryx_u64(0x0, x123, x125, &x131);
-{ uint64_t x134; uint8_t x135 = _addcarryx_u64(x132, x126, x128, &x134);
-{ uint64_t x137; uint8_t _ = _addcarryx_u64(0x0, x135, x129, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(0x0, x112, x122, &x140);
-{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x115, x131, &x143);
-{ uint64_t x146; uint8_t x147 = _addcarryx_u64(x144, x118, x134, &x146);
-{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x120, x137, &x149);
-{ uint64_t x153; uint64_t x152 = _mulx_u64(x140, 0xffffffffffffffffL, &x153);
-{ uint64_t x156; uint64_t x155 = _mulx_u64(x140, 0xfffffffffffffffeL, &x156);
-{ uint64_t x159; uint64_t x158 = _mulx_u64(x140, 0xffffffffffffffffL, &x159);
-{ uint64_t x161; uint8_t x162 = _addcarryx_u64(0x0, x153, x155, &x161);
-{ uint64_t x164; uint8_t x165 = _addcarryx_u64(x162, x156, x158, &x164);
-{ uint64_t x167; uint8_t _ = _addcarryx_u64(0x0, x165, x159, &x167);
-{ uint64_t _; uint8_t x171 = _addcarryx_u64(0x0, x140, x152, &_);
-{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x143, x161, &x173);
-{ uint64_t x176; uint8_t x177 = _addcarryx_u64(x174, x146, x164, &x176);
-{ uint64_t x179; uint8_t x180 = _addcarryx_u64(x177, x149, x167, &x179);
-{ uint8_t x181 = (x180 + x150);
-{ uint64_t x183; uint8_t x184 = _subborrow_u64(0x0, x173, 0xffffffffffffffffL, &x183);
-{ uint64_t x186; uint8_t x187 = _subborrow_u64(x184, x176, 0xfffffffffffffffeL, &x186);
-{ uint64_t x189; uint8_t x190 = _subborrow_u64(x187, x179, 0xffffffffffffffffL, &x189);
-{ uint64_t _; uint8_t x193 = _subborrow_u64(x190, x181, 0x0, &_);
-{ uint64_t x194 = cmovznz(x193, x189, x179);
-{ uint64_t x195 = cmovznz(x193, x186, x176);
-{ uint64_t x196 = cmovznz(x193, x183, x173);
-out[0] = x194;
-out[1] = x195;
-out[2] = x196;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+ { uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+ { uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+ { uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+ { uint64_t x32; uint64_t x31 = _mulx_u64(x13, 0xffffffffffffffffL, &x32);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x13, 0xfffffffffffffffeL, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x13, 0xffffffffffffffffL, &x38);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(0x0, x32, x34, &x40);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(x41, x35, x37, &x43);
+ { uint64_t x46; uint8_t _ = _addcarryx_u64(0x0, x44, x38, &x46);
+ { uint64_t _; uint8_t x50 = _addcarryx_u64(0x0, x13, x31, &_);
+ { uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x22, x40, &x52);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x25, x43, &x55);
+ { uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x28, x46, &x58);
+ { uint64_t x62; uint64_t x61 = _mulx_u64(x7, x9, &x62);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x7, x11, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x7, x10, &x68);
+ { uint64_t x70; uint8_t x71 = _addcarryx_u64(0x0, x62, x64, &x70);
+ { uint64_t x73; uint8_t x74 = _addcarryx_u64(x71, x65, x67, &x73);
+ { uint64_t x76; uint8_t _ = _addcarryx_u64(0x0, x74, x68, &x76);
+ { uint64_t x79; uint8_t x80 = _addcarryx_u64(0x0, x52, x61, &x79);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(x80, x55, x70, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x59, x76, &x88);
+ { uint64_t x92; uint64_t x91 = _mulx_u64(x79, 0xffffffffffffffffL, &x92);
+ { uint64_t x95; uint64_t x94 = _mulx_u64(x79, 0xfffffffffffffffeL, &x95);
+ { uint64_t x98; uint64_t x97 = _mulx_u64(x79, 0xffffffffffffffffL, &x98);
+ { uint64_t x100; uint8_t x101 = _addcarryx_u64(0x0, x92, x94, &x100);
+ { uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x95, x97, &x103);
+ { uint64_t x106; uint8_t _ = _addcarryx_u64(0x0, x104, x98, &x106);
+ { uint64_t _; uint8_t x110 = _addcarryx_u64(0x0, x79, x91, &_);
+ { uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x82, x100, &x112);
+ { uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x85, x103, &x115);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x88, x106, &x118);
+ { uint8_t x120 = (x119 + x89);
+ { uint64_t x123; uint64_t x122 = _mulx_u64(x6, x9, &x123);
+ { uint64_t x126; uint64_t x125 = _mulx_u64(x6, x11, &x126);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x6, x10, &x129);
+ { uint64_t x131; uint8_t x132 = _addcarryx_u64(0x0, x123, x125, &x131);
+ { uint64_t x134; uint8_t x135 = _addcarryx_u64(x132, x126, x128, &x134);
+ { uint64_t x137; uint8_t _ = _addcarryx_u64(0x0, x135, x129, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(0x0, x112, x122, &x140);
+ { uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x115, x131, &x143);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(x144, x118, x134, &x146);
+ { uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x120, x137, &x149);
+ { uint64_t x153; uint64_t x152 = _mulx_u64(x140, 0xffffffffffffffffL, &x153);
+ { uint64_t x156; uint64_t x155 = _mulx_u64(x140, 0xfffffffffffffffeL, &x156);
+ { uint64_t x159; uint64_t x158 = _mulx_u64(x140, 0xffffffffffffffffL, &x159);
+ { uint64_t x161; uint8_t x162 = _addcarryx_u64(0x0, x153, x155, &x161);
+ { uint64_t x164; uint8_t x165 = _addcarryx_u64(x162, x156, x158, &x164);
+ { uint64_t x167; uint8_t _ = _addcarryx_u64(0x0, x165, x159, &x167);
+ { uint64_t _; uint8_t x171 = _addcarryx_u64(0x0, x140, x152, &_);
+ { uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x143, x161, &x173);
+ { uint64_t x176; uint8_t x177 = _addcarryx_u64(x174, x146, x164, &x176);
+ { uint64_t x179; uint8_t x180 = _addcarryx_u64(x177, x149, x167, &x179);
+ { uint8_t x181 = (x180 + x150);
+ { uint64_t x183; uint8_t x184 = _subborrow_u64(0x0, x173, 0xffffffffffffffffL, &x183);
+ { uint64_t x186; uint8_t x187 = _subborrow_u64(x184, x176, 0xfffffffffffffffeL, &x186);
+ { uint64_t x189; uint8_t x190 = _subborrow_u64(x187, x179, 0xffffffffffffffffL, &x189);
+ { uint64_t _; uint8_t x193 = _subborrow_u64(x190, x181, 0x0, &_);
+ { uint64_t x194 = cmovznz(x193, x189, x179);
+ { uint64_t x195 = cmovznz(x193, x186, x176);
+ { uint64_t x196 = cmovznz(x193, x183, x173);
+ out[0] = x196;
+ out[1] = x195;
+ out[2] = x194;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e192m2e64m1/fenz.c b/src/Specific/montgomery64_2e192m2e64m1/fenz.c
index e29935ffc..2632a2866 100644
--- a/src/Specific/montgomery64_2e192m2e64m1/fenz.c
+++ b/src/Specific/montgomery64_2e192m2e64m1/fenz.c
@@ -1,24 +1,9 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x5 = (x4 | x3);
-{ uint64_t x6 = (x2 | x5);
-out[0] = x6;
-}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x5 = (x4 | x3);
+ { uint64_t x6 = (x2 | x5);
+ out[0] = x6;
+ }}}}}
+}
diff --git a/src/Specific/montgomery64_2e192m2e64m1/feopp.c b/src/Specific/montgomery64_2e192m2e64m1/feopp.c
index 1c0de56c9..ff340f966 100644
--- a/src/Specific/montgomery64_2e192m2e64m1/feopp.c
+++ b/src/Specific/montgomery64_2e192m2e64m1/feopp.c
@@ -1,34 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
-{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
-{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
-{ uint64_t x15 = (x14 & 0xffffffffffffffffL);
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
-{ uint64_t x19 = (x14 & 0xfffffffffffffffeL);
-{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
-{ uint64_t x23 = (x14 & 0xffffffffffffffffL);
-{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
-out[0] = x25;
-out[1] = x21;
-out[2] = x17;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void feopp(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+ { uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0xffffffffffffffffL);
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+ { uint64_t x19 = (x14 & 0xfffffffffffffffeL);
+ { uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+ { uint64_t x23 = (x14 & 0xffffffffffffffffL);
+ { uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e192m2e64m1/fesub.c b/src/Specific/montgomery64_2e192m2e64m1/fesub.c
index 0b3c8b1c0..a5b851fbc 100644
--- a/src/Specific/montgomery64_2e192m2e64m1/fesub.c
+++ b/src/Specific/montgomery64_2e192m2e64m1/fesub.c
@@ -1,34 +1,22 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
-{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
-{ uint64_t x22 = (x21 & 0xffffffffffffffffL);
-{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
-{ uint64_t x26 = (x21 & 0xfffffffffffffffeL);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
-{ uint64_t x30 = (x21 & 0xffffffffffffffffL);
-{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
-out[0] = x32;
-out[1] = x28;
-out[2] = x24;
-}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesub(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+ { uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+ { uint64_t x22 = (x21 & 0xffffffffffffffffL);
+ { uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+ { uint64_t x26 = (x21 & 0xfffffffffffffffeL);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+ { uint64_t x30 = (x21 & 0xffffffffffffffffL);
+ { uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+ out[0] = x24;
+ out[1] = x28;
+ out[2] = x32;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e194m33/feadd.c b/src/Specific/montgomery64_2e194m33/feadd.c
index 077a1af52..5640ec8b8 100644
--- a/src/Specific/montgomery64_2e194m33/feadd.c
+++ b/src/Specific/montgomery64_2e194m33/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffdfL, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x3, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffdfL, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x3, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e194m33/femul.c b/src/Specific/montgomery64_2e194m33/femul.c
index 774ec81d8..aecd68cb1 100644
--- a/src/Specific/montgomery64_2e194m33/femul.c
+++ b/src/Specific/montgomery64_2e194m33/femul.c
@@ -1,36 +1,130 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xf83e0f83e0f83e1, &_);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffffdfL, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
-out[0] = uint64_t x53;
-out[1] = uint8_t x54 = Op Syntax.MulSplit 64 Syntax.TWord 6 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 3 x41;
-out[2] = 0x3;;
-}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xf83e0f83e0f83e1, &_);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffffdfL, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+ { uint64_t x53, uint8_t x54 = Op (Syntax.MulSplit 64 (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x41, 0x3);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+ { uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+ { uint8_t x64 = (x63 + x54);
+ { uint64_t _; uint8_t x67 = _addcarryx_u64(0x0, x17, x44, &_);
+ { uint64_t x69; uint8_t x70 = _addcarryx_u64(x67, x29, x56, &x69);
+ { uint64_t x72; uint8_t x73 = _addcarryx_u64(x70, x32, x59, &x72);
+ { uint64_t x75; uint8_t x76 = _addcarryx_u64(x73, x35, x62, &x75);
+ { uint64_t x78; uint8_t x79 = _addcarryx_u64(x76, x38, x64, &x78);
+ { uint64_t x82; uint64_t x81 = _mulx_u64(x7, x11, &x82);
+ { uint64_t x85; uint64_t x84 = _mulx_u64(x7, x13, &x85);
+ { uint64_t x88; uint64_t x87 = _mulx_u64(x7, x15, &x88);
+ { uint64_t x91; uint64_t x90 = _mulx_u64(x7, x14, &x91);
+ { uint64_t x93; uint8_t x94 = _addcarryx_u64(0x0, x82, x84, &x93);
+ { uint64_t x96; uint8_t x97 = _addcarryx_u64(x94, x85, x87, &x96);
+ { uint64_t x99; uint8_t x100 = _addcarryx_u64(x97, x88, x90, &x99);
+ { uint64_t x102; uint8_t _ = _addcarryx_u64(0x0, x100, x91, &x102);
+ { uint64_t x105; uint8_t x106 = _addcarryx_u64(0x0, x69, x81, &x105);
+ { uint64_t x108; uint8_t x109 = _addcarryx_u64(x106, x72, x93, &x108);
+ { uint64_t x111; uint8_t x112 = _addcarryx_u64(x109, x75, x96, &x111);
+ { uint64_t x114; uint8_t x115 = _addcarryx_u64(x112, x78, x99, &x114);
+ { uint64_t x117; uint8_t x118 = _addcarryx_u64(x115, x79, x102, &x117);
+ { uint64_t _; uint64_t x120 = _mulx_u64(x105, 0xf83e0f83e0f83e1, &_);
+ { uint64_t x124; uint64_t x123 = _mulx_u64(x120, 0xffffffffffffffdfL, &x124);
+ { uint64_t x127; uint64_t x126 = _mulx_u64(x120, 0xffffffffffffffffL, &x127);
+ { uint64_t x130; uint64_t x129 = _mulx_u64(x120, 0xffffffffffffffffL, &x130);
+ { uint64_t x132, uint8_t x133 = Op (Syntax.MulSplit 64 (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x120, 0x3);
+ { uint64_t x135; uint8_t x136 = _addcarryx_u64(0x0, x124, x126, &x135);
+ { uint64_t x138; uint8_t x139 = _addcarryx_u64(x136, x127, x129, &x138);
+ { uint64_t x141; uint8_t x142 = _addcarryx_u64(x139, x130, x132, &x141);
+ { uint8_t x143 = (x142 + x133);
+ { uint64_t _; uint8_t x146 = _addcarryx_u64(0x0, x105, x123, &_);
+ { uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x108, x135, &x148);
+ { uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x111, x138, &x151);
+ { uint64_t x154; uint8_t x155 = _addcarryx_u64(x152, x114, x141, &x154);
+ { uint64_t x157; uint8_t x158 = _addcarryx_u64(x155, x117, x143, &x157);
+ { uint8_t x159 = (x158 + x118);
+ { uint64_t x162; uint64_t x161 = _mulx_u64(x9, x11, &x162);
+ { uint64_t x165; uint64_t x164 = _mulx_u64(x9, x13, &x165);
+ { uint64_t x168; uint64_t x167 = _mulx_u64(x9, x15, &x168);
+ { uint64_t x171; uint64_t x170 = _mulx_u64(x9, x14, &x171);
+ { uint64_t x173; uint8_t x174 = _addcarryx_u64(0x0, x162, x164, &x173);
+ { uint64_t x176; uint8_t x177 = _addcarryx_u64(x174, x165, x167, &x176);
+ { uint64_t x179; uint8_t x180 = _addcarryx_u64(x177, x168, x170, &x179);
+ { uint64_t x182; uint8_t _ = _addcarryx_u64(0x0, x180, x171, &x182);
+ { uint64_t x185; uint8_t x186 = _addcarryx_u64(0x0, x148, x161, &x185);
+ { uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x151, x173, &x188);
+ { uint64_t x191; uint8_t x192 = _addcarryx_u64(x189, x154, x176, &x191);
+ { uint64_t x194; uint8_t x195 = _addcarryx_u64(x192, x157, x179, &x194);
+ { uint64_t x197; uint8_t x198 = _addcarryx_u64(x195, x159, x182, &x197);
+ { uint64_t _; uint64_t x200 = _mulx_u64(x185, 0xf83e0f83e0f83e1, &_);
+ { uint64_t x204; uint64_t x203 = _mulx_u64(x200, 0xffffffffffffffdfL, &x204);
+ { uint64_t x207; uint64_t x206 = _mulx_u64(x200, 0xffffffffffffffffL, &x207);
+ { uint64_t x210; uint64_t x209 = _mulx_u64(x200, 0xffffffffffffffffL, &x210);
+ { uint64_t x212, uint8_t x213 = Op (Syntax.MulSplit 64 (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x200, 0x3);
+ { uint64_t x215; uint8_t x216 = _addcarryx_u64(0x0, x204, x206, &x215);
+ { uint64_t x218; uint8_t x219 = _addcarryx_u64(x216, x207, x209, &x218);
+ { uint64_t x221; uint8_t x222 = _addcarryx_u64(x219, x210, x212, &x221);
+ { uint8_t x223 = (x222 + x213);
+ { uint64_t _; uint8_t x226 = _addcarryx_u64(0x0, x185, x203, &_);
+ { uint64_t x228; uint8_t x229 = _addcarryx_u64(x226, x188, x215, &x228);
+ { uint64_t x231; uint8_t x232 = _addcarryx_u64(x229, x191, x218, &x231);
+ { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x194, x221, &x234);
+ { uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x197, x223, &x237);
+ { uint8_t x239 = (x238 + x198);
+ { uint64_t x242; uint64_t x241 = _mulx_u64(x8, x11, &x242);
+ { uint64_t x245; uint64_t x244 = _mulx_u64(x8, x13, &x245);
+ { uint64_t x248; uint64_t x247 = _mulx_u64(x8, x15, &x248);
+ { uint64_t x251; uint64_t x250 = _mulx_u64(x8, x14, &x251);
+ { uint64_t x253; uint8_t x254 = _addcarryx_u64(0x0, x242, x244, &x253);
+ { uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x245, x247, &x256);
+ { uint64_t x259; uint8_t x260 = _addcarryx_u64(x257, x248, x250, &x259);
+ { uint64_t x262; uint8_t _ = _addcarryx_u64(0x0, x260, x251, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(0x0, x228, x241, &x265);
+ { uint64_t x268; uint8_t x269 = _addcarryx_u64(x266, x231, x253, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x234, x256, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x239, x262, &x277);
+ { uint64_t _; uint64_t x280 = _mulx_u64(x265, 0xf83e0f83e0f83e1, &_);
+ { uint64_t x284; uint64_t x283 = _mulx_u64(x280, 0xffffffffffffffdfL, &x284);
+ { uint64_t x287; uint64_t x286 = _mulx_u64(x280, 0xffffffffffffffffL, &x287);
+ { uint64_t x290; uint64_t x289 = _mulx_u64(x280, 0xffffffffffffffffL, &x290);
+ { uint64_t x292, uint8_t x293 = Op (Syntax.MulSplit 64 (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x280, 0x3);
+ { uint64_t x295; uint8_t x296 = _addcarryx_u64(0x0, x284, x286, &x295);
+ { uint64_t x298; uint8_t x299 = _addcarryx_u64(x296, x287, x289, &x298);
+ { uint64_t x301; uint8_t x302 = _addcarryx_u64(x299, x290, x292, &x301);
+ { uint8_t x303 = (x302 + x293);
+ { uint64_t _; uint8_t x306 = _addcarryx_u64(0x0, x265, x283, &_);
+ { uint64_t x308; uint8_t x309 = _addcarryx_u64(x306, x268, x295, &x308);
+ { uint64_t x311; uint8_t x312 = _addcarryx_u64(x309, x271, x298, &x311);
+ { uint64_t x314; uint8_t x315 = _addcarryx_u64(x312, x274, x301, &x314);
+ { uint64_t x317; uint8_t x318 = _addcarryx_u64(x315, x277, x303, &x317);
+ { uint8_t x319 = (x318 + x278);
+ { uint64_t x321; uint8_t x322 = _subborrow_u64(0x0, x308, 0xffffffffffffffdfL, &x321);
+ { uint64_t x324; uint8_t x325 = _subborrow_u64(x322, x311, 0xffffffffffffffffL, &x324);
+ { uint64_t x327; uint8_t x328 = _subborrow_u64(x325, x314, 0xffffffffffffffffL, &x327);
+ { uint64_t x330; uint8_t x331 = _subborrow_u64(x328, x317, 0x3, &x330);
+ { uint64_t _; uint8_t x334 = _subborrow_u64(x331, x319, 0x0, &_);
+ { uint64_t x335 = cmovznz(x334, x330, x317);
+ { uint64_t x336 = cmovznz(x334, x327, x314);
+ { uint64_t x337 = cmovznz(x334, x324, x311);
+ { uint64_t x338 = cmovznz(x334, x321, x308);
+ out[0] = x338;
+ out[1] = x337;
+ out[2] = x336;
+ out[3] = x335;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e194m33/fenz.c b/src/Specific/montgomery64_2e194m33/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e194m33/fenz.c
+++ b/src/Specific/montgomery64_2e194m33/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e194m33/feopp.c b/src/Specific/montgomery64_2e194m33/feopp.c
index 0301c52c9..7936f63b6 100644
--- a/src/Specific/montgomery64_2e194m33/feopp.c
+++ b/src/Specific/montgomery64_2e194m33/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xffffffffffffffdfL);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint8_t x32 = ((uint8_t)x19 & 0x3);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xffffffffffffffdfL);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint8_t x32 = ((uint8_t)x19 & 0x3);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e194m33/fesub.c b/src/Specific/montgomery64_2e194m33/fesub.c
index 9c5537644..ce527ea74 100644
--- a/src/Specific/montgomery64_2e194m33/fesub.c
+++ b/src/Specific/montgomery64_2e194m33/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xffffffffffffffdfL);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint8_t x41 = ((uint8_t)x28 & 0x3);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xffffffffffffffdfL);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint8_t x41 = ((uint8_t)x28 & 0x3);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e196m15/feadd.c b/src/Specific/montgomery64_2e196m15/feadd.c
index c70f970dc..07a03ff20 100644
--- a/src/Specific/montgomery64_2e196m15/feadd.c
+++ b/src/Specific/montgomery64_2e196m15/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffff1L, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xf, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffff1L, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xf, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e196m15/femul.c b/src/Specific/montgomery64_2e196m15/femul.c
index 61ba1a7e2..0febcb70c 100644
--- a/src/Specific/montgomery64_2e196m15/femul.c
+++ b/src/Specific/montgomery64_2e196m15/femul.c
@@ -1,36 +1,130 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xeeeeeeeeeeeeeeefL, &_);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffff1L, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
-out[0] = uint64_t x53;
-out[1] = uint8_t x54 = Op Syntax.MulSplit 64 Syntax.TWord 6 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 3 x41;
-out[2] = 0xf;;
-}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xeeeeeeeeeeeeeeefL, &_);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffff1L, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+ { uint64_t x53, uint8_t x54 = Op (Syntax.MulSplit 64 (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x41, 0xf);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+ { uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+ { uint8_t x64 = (x63 + x54);
+ { uint64_t _; uint8_t x67 = _addcarryx_u64(0x0, x17, x44, &_);
+ { uint64_t x69; uint8_t x70 = _addcarryx_u64(x67, x29, x56, &x69);
+ { uint64_t x72; uint8_t x73 = _addcarryx_u64(x70, x32, x59, &x72);
+ { uint64_t x75; uint8_t x76 = _addcarryx_u64(x73, x35, x62, &x75);
+ { uint64_t x78; uint8_t x79 = _addcarryx_u64(x76, x38, x64, &x78);
+ { uint64_t x82; uint64_t x81 = _mulx_u64(x7, x11, &x82);
+ { uint64_t x85; uint64_t x84 = _mulx_u64(x7, x13, &x85);
+ { uint64_t x88; uint64_t x87 = _mulx_u64(x7, x15, &x88);
+ { uint64_t x91; uint64_t x90 = _mulx_u64(x7, x14, &x91);
+ { uint64_t x93; uint8_t x94 = _addcarryx_u64(0x0, x82, x84, &x93);
+ { uint64_t x96; uint8_t x97 = _addcarryx_u64(x94, x85, x87, &x96);
+ { uint64_t x99; uint8_t x100 = _addcarryx_u64(x97, x88, x90, &x99);
+ { uint64_t x102; uint8_t _ = _addcarryx_u64(0x0, x100, x91, &x102);
+ { uint64_t x105; uint8_t x106 = _addcarryx_u64(0x0, x69, x81, &x105);
+ { uint64_t x108; uint8_t x109 = _addcarryx_u64(x106, x72, x93, &x108);
+ { uint64_t x111; uint8_t x112 = _addcarryx_u64(x109, x75, x96, &x111);
+ { uint64_t x114; uint8_t x115 = _addcarryx_u64(x112, x78, x99, &x114);
+ { uint64_t x117; uint8_t x118 = _addcarryx_u64(x115, x79, x102, &x117);
+ { uint64_t _; uint64_t x120 = _mulx_u64(x105, 0xeeeeeeeeeeeeeeefL, &_);
+ { uint64_t x124; uint64_t x123 = _mulx_u64(x120, 0xfffffffffffffff1L, &x124);
+ { uint64_t x127; uint64_t x126 = _mulx_u64(x120, 0xffffffffffffffffL, &x127);
+ { uint64_t x130; uint64_t x129 = _mulx_u64(x120, 0xffffffffffffffffL, &x130);
+ { uint64_t x132, uint8_t x133 = Op (Syntax.MulSplit 64 (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x120, 0xf);
+ { uint64_t x135; uint8_t x136 = _addcarryx_u64(0x0, x124, x126, &x135);
+ { uint64_t x138; uint8_t x139 = _addcarryx_u64(x136, x127, x129, &x138);
+ { uint64_t x141; uint8_t x142 = _addcarryx_u64(x139, x130, x132, &x141);
+ { uint8_t x143 = (x142 + x133);
+ { uint64_t _; uint8_t x146 = _addcarryx_u64(0x0, x105, x123, &_);
+ { uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x108, x135, &x148);
+ { uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x111, x138, &x151);
+ { uint64_t x154; uint8_t x155 = _addcarryx_u64(x152, x114, x141, &x154);
+ { uint64_t x157; uint8_t x158 = _addcarryx_u64(x155, x117, x143, &x157);
+ { uint8_t x159 = (x158 + x118);
+ { uint64_t x162; uint64_t x161 = _mulx_u64(x9, x11, &x162);
+ { uint64_t x165; uint64_t x164 = _mulx_u64(x9, x13, &x165);
+ { uint64_t x168; uint64_t x167 = _mulx_u64(x9, x15, &x168);
+ { uint64_t x171; uint64_t x170 = _mulx_u64(x9, x14, &x171);
+ { uint64_t x173; uint8_t x174 = _addcarryx_u64(0x0, x162, x164, &x173);
+ { uint64_t x176; uint8_t x177 = _addcarryx_u64(x174, x165, x167, &x176);
+ { uint64_t x179; uint8_t x180 = _addcarryx_u64(x177, x168, x170, &x179);
+ { uint64_t x182; uint8_t _ = _addcarryx_u64(0x0, x180, x171, &x182);
+ { uint64_t x185; uint8_t x186 = _addcarryx_u64(0x0, x148, x161, &x185);
+ { uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x151, x173, &x188);
+ { uint64_t x191; uint8_t x192 = _addcarryx_u64(x189, x154, x176, &x191);
+ { uint64_t x194; uint8_t x195 = _addcarryx_u64(x192, x157, x179, &x194);
+ { uint64_t x197; uint8_t x198 = _addcarryx_u64(x195, x159, x182, &x197);
+ { uint64_t _; uint64_t x200 = _mulx_u64(x185, 0xeeeeeeeeeeeeeeefL, &_);
+ { uint64_t x204; uint64_t x203 = _mulx_u64(x200, 0xfffffffffffffff1L, &x204);
+ { uint64_t x207; uint64_t x206 = _mulx_u64(x200, 0xffffffffffffffffL, &x207);
+ { uint64_t x210; uint64_t x209 = _mulx_u64(x200, 0xffffffffffffffffL, &x210);
+ { uint64_t x212, uint8_t x213 = Op (Syntax.MulSplit 64 (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x200, 0xf);
+ { uint64_t x215; uint8_t x216 = _addcarryx_u64(0x0, x204, x206, &x215);
+ { uint64_t x218; uint8_t x219 = _addcarryx_u64(x216, x207, x209, &x218);
+ { uint64_t x221; uint8_t x222 = _addcarryx_u64(x219, x210, x212, &x221);
+ { uint8_t x223 = (x222 + x213);
+ { uint64_t _; uint8_t x226 = _addcarryx_u64(0x0, x185, x203, &_);
+ { uint64_t x228; uint8_t x229 = _addcarryx_u64(x226, x188, x215, &x228);
+ { uint64_t x231; uint8_t x232 = _addcarryx_u64(x229, x191, x218, &x231);
+ { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x194, x221, &x234);
+ { uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x197, x223, &x237);
+ { uint8_t x239 = (x238 + x198);
+ { uint64_t x242; uint64_t x241 = _mulx_u64(x8, x11, &x242);
+ { uint64_t x245; uint64_t x244 = _mulx_u64(x8, x13, &x245);
+ { uint64_t x248; uint64_t x247 = _mulx_u64(x8, x15, &x248);
+ { uint64_t x251; uint64_t x250 = _mulx_u64(x8, x14, &x251);
+ { uint64_t x253; uint8_t x254 = _addcarryx_u64(0x0, x242, x244, &x253);
+ { uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x245, x247, &x256);
+ { uint64_t x259; uint8_t x260 = _addcarryx_u64(x257, x248, x250, &x259);
+ { uint64_t x262; uint8_t _ = _addcarryx_u64(0x0, x260, x251, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(0x0, x228, x241, &x265);
+ { uint64_t x268; uint8_t x269 = _addcarryx_u64(x266, x231, x253, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x234, x256, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x239, x262, &x277);
+ { uint64_t _; uint64_t x280 = _mulx_u64(x265, 0xeeeeeeeeeeeeeeefL, &_);
+ { uint64_t x284; uint64_t x283 = _mulx_u64(x280, 0xfffffffffffffff1L, &x284);
+ { uint64_t x287; uint64_t x286 = _mulx_u64(x280, 0xffffffffffffffffL, &x287);
+ { uint64_t x290; uint64_t x289 = _mulx_u64(x280, 0xffffffffffffffffL, &x290);
+ { uint64_t x292, uint8_t x293 = Op (Syntax.MulSplit 64 (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x280, 0xf);
+ { uint64_t x295; uint8_t x296 = _addcarryx_u64(0x0, x284, x286, &x295);
+ { uint64_t x298; uint8_t x299 = _addcarryx_u64(x296, x287, x289, &x298);
+ { uint64_t x301; uint8_t x302 = _addcarryx_u64(x299, x290, x292, &x301);
+ { uint8_t x303 = (x302 + x293);
+ { uint64_t _; uint8_t x306 = _addcarryx_u64(0x0, x265, x283, &_);
+ { uint64_t x308; uint8_t x309 = _addcarryx_u64(x306, x268, x295, &x308);
+ { uint64_t x311; uint8_t x312 = _addcarryx_u64(x309, x271, x298, &x311);
+ { uint64_t x314; uint8_t x315 = _addcarryx_u64(x312, x274, x301, &x314);
+ { uint64_t x317; uint8_t x318 = _addcarryx_u64(x315, x277, x303, &x317);
+ { uint8_t x319 = (x318 + x278);
+ { uint64_t x321; uint8_t x322 = _subborrow_u64(0x0, x308, 0xfffffffffffffff1L, &x321);
+ { uint64_t x324; uint8_t x325 = _subborrow_u64(x322, x311, 0xffffffffffffffffL, &x324);
+ { uint64_t x327; uint8_t x328 = _subborrow_u64(x325, x314, 0xffffffffffffffffL, &x327);
+ { uint64_t x330; uint8_t x331 = _subborrow_u64(x328, x317, 0xf, &x330);
+ { uint64_t _; uint8_t x334 = _subborrow_u64(x331, x319, 0x0, &_);
+ { uint64_t x335 = cmovznz(x334, x330, x317);
+ { uint64_t x336 = cmovznz(x334, x327, x314);
+ { uint64_t x337 = cmovznz(x334, x324, x311);
+ { uint64_t x338 = cmovznz(x334, x321, x308);
+ out[0] = x338;
+ out[1] = x337;
+ out[2] = x336;
+ out[3] = x335;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e196m15/fenz.c b/src/Specific/montgomery64_2e196m15/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e196m15/fenz.c
+++ b/src/Specific/montgomery64_2e196m15/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e196m15/feopp.c b/src/Specific/montgomery64_2e196m15/feopp.c
index c78613e0e..b8d1c31ca 100644
--- a/src/Specific/montgomery64_2e196m15/feopp.c
+++ b/src/Specific/montgomery64_2e196m15/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xfffffffffffffff1L);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint8_t x32 = ((uint8_t)x19 & 0xf);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xfffffffffffffff1L);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint8_t x32 = ((uint8_t)x19 & 0xf);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e196m15/fesub.c b/src/Specific/montgomery64_2e196m15/fesub.c
index 982a6ed70..c98c307bd 100644
--- a/src/Specific/montgomery64_2e196m15/fesub.c
+++ b/src/Specific/montgomery64_2e196m15/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xfffffffffffffff1L);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint8_t x41 = ((uint8_t)x28 & 0xf);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xfffffffffffffff1L);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint8_t x41 = ((uint8_t)x28 & 0xf);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e198m17/feadd.c b/src/Specific/montgomery64_2e198m17/feadd.c
index 7422664b9..c980a1aae 100644
--- a/src/Specific/montgomery64_2e198m17/feadd.c
+++ b/src/Specific/montgomery64_2e198m17/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffefL, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x3f, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffefL, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x3f, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e198m17/femul.c b/src/Specific/montgomery64_2e198m17/femul.c
index 0e4d843b8..c669fcb0f 100644
--- a/src/Specific/montgomery64_2e198m17/femul.c
+++ b/src/Specific/montgomery64_2e198m17/femul.c
@@ -1,36 +1,130 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xf0f0f0f0f0f0f0f1L, &_);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffffefL, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
-out[0] = uint64_t x53;
-out[1] = uint8_t x54 = Op Syntax.MulSplit 64 Syntax.TWord 6 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 3 x41;
-out[2] = 0x3f;;
-}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xf0f0f0f0f0f0f0f1L, &_);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffffefL, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+ { uint64_t x53, uint8_t x54 = Op (Syntax.MulSplit 64 (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x41, 0x3f);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+ { uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+ { uint8_t x64 = (x63 + x54);
+ { uint64_t _; uint8_t x67 = _addcarryx_u64(0x0, x17, x44, &_);
+ { uint64_t x69; uint8_t x70 = _addcarryx_u64(x67, x29, x56, &x69);
+ { uint64_t x72; uint8_t x73 = _addcarryx_u64(x70, x32, x59, &x72);
+ { uint64_t x75; uint8_t x76 = _addcarryx_u64(x73, x35, x62, &x75);
+ { uint64_t x78; uint8_t x79 = _addcarryx_u64(x76, x38, x64, &x78);
+ { uint64_t x82; uint64_t x81 = _mulx_u64(x7, x11, &x82);
+ { uint64_t x85; uint64_t x84 = _mulx_u64(x7, x13, &x85);
+ { uint64_t x88; uint64_t x87 = _mulx_u64(x7, x15, &x88);
+ { uint64_t x91; uint64_t x90 = _mulx_u64(x7, x14, &x91);
+ { uint64_t x93; uint8_t x94 = _addcarryx_u64(0x0, x82, x84, &x93);
+ { uint64_t x96; uint8_t x97 = _addcarryx_u64(x94, x85, x87, &x96);
+ { uint64_t x99; uint8_t x100 = _addcarryx_u64(x97, x88, x90, &x99);
+ { uint64_t x102; uint8_t _ = _addcarryx_u64(0x0, x100, x91, &x102);
+ { uint64_t x105; uint8_t x106 = _addcarryx_u64(0x0, x69, x81, &x105);
+ { uint64_t x108; uint8_t x109 = _addcarryx_u64(x106, x72, x93, &x108);
+ { uint64_t x111; uint8_t x112 = _addcarryx_u64(x109, x75, x96, &x111);
+ { uint64_t x114; uint8_t x115 = _addcarryx_u64(x112, x78, x99, &x114);
+ { uint64_t x117; uint8_t x118 = _addcarryx_u64(x115, x79, x102, &x117);
+ { uint64_t _; uint64_t x120 = _mulx_u64(x105, 0xf0f0f0f0f0f0f0f1L, &_);
+ { uint64_t x124; uint64_t x123 = _mulx_u64(x120, 0xffffffffffffffefL, &x124);
+ { uint64_t x127; uint64_t x126 = _mulx_u64(x120, 0xffffffffffffffffL, &x127);
+ { uint64_t x130; uint64_t x129 = _mulx_u64(x120, 0xffffffffffffffffL, &x130);
+ { uint64_t x132, uint8_t x133 = Op (Syntax.MulSplit 64 (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x120, 0x3f);
+ { uint64_t x135; uint8_t x136 = _addcarryx_u64(0x0, x124, x126, &x135);
+ { uint64_t x138; uint8_t x139 = _addcarryx_u64(x136, x127, x129, &x138);
+ { uint64_t x141; uint8_t x142 = _addcarryx_u64(x139, x130, x132, &x141);
+ { uint8_t x143 = (x142 + x133);
+ { uint64_t _; uint8_t x146 = _addcarryx_u64(0x0, x105, x123, &_);
+ { uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x108, x135, &x148);
+ { uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x111, x138, &x151);
+ { uint64_t x154; uint8_t x155 = _addcarryx_u64(x152, x114, x141, &x154);
+ { uint64_t x157; uint8_t x158 = _addcarryx_u64(x155, x117, x143, &x157);
+ { uint8_t x159 = (x158 + x118);
+ { uint64_t x162; uint64_t x161 = _mulx_u64(x9, x11, &x162);
+ { uint64_t x165; uint64_t x164 = _mulx_u64(x9, x13, &x165);
+ { uint64_t x168; uint64_t x167 = _mulx_u64(x9, x15, &x168);
+ { uint64_t x171; uint64_t x170 = _mulx_u64(x9, x14, &x171);
+ { uint64_t x173; uint8_t x174 = _addcarryx_u64(0x0, x162, x164, &x173);
+ { uint64_t x176; uint8_t x177 = _addcarryx_u64(x174, x165, x167, &x176);
+ { uint64_t x179; uint8_t x180 = _addcarryx_u64(x177, x168, x170, &x179);
+ { uint64_t x182; uint8_t _ = _addcarryx_u64(0x0, x180, x171, &x182);
+ { uint64_t x185; uint8_t x186 = _addcarryx_u64(0x0, x148, x161, &x185);
+ { uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x151, x173, &x188);
+ { uint64_t x191; uint8_t x192 = _addcarryx_u64(x189, x154, x176, &x191);
+ { uint64_t x194; uint8_t x195 = _addcarryx_u64(x192, x157, x179, &x194);
+ { uint64_t x197; uint8_t x198 = _addcarryx_u64(x195, x159, x182, &x197);
+ { uint64_t _; uint64_t x200 = _mulx_u64(x185, 0xf0f0f0f0f0f0f0f1L, &_);
+ { uint64_t x204; uint64_t x203 = _mulx_u64(x200, 0xffffffffffffffefL, &x204);
+ { uint64_t x207; uint64_t x206 = _mulx_u64(x200, 0xffffffffffffffffL, &x207);
+ { uint64_t x210; uint64_t x209 = _mulx_u64(x200, 0xffffffffffffffffL, &x210);
+ { uint64_t x212, uint8_t x213 = Op (Syntax.MulSplit 64 (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x200, 0x3f);
+ { uint64_t x215; uint8_t x216 = _addcarryx_u64(0x0, x204, x206, &x215);
+ { uint64_t x218; uint8_t x219 = _addcarryx_u64(x216, x207, x209, &x218);
+ { uint64_t x221; uint8_t x222 = _addcarryx_u64(x219, x210, x212, &x221);
+ { uint8_t x223 = (x222 + x213);
+ { uint64_t _; uint8_t x226 = _addcarryx_u64(0x0, x185, x203, &_);
+ { uint64_t x228; uint8_t x229 = _addcarryx_u64(x226, x188, x215, &x228);
+ { uint64_t x231; uint8_t x232 = _addcarryx_u64(x229, x191, x218, &x231);
+ { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x194, x221, &x234);
+ { uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x197, x223, &x237);
+ { uint8_t x239 = (x238 + x198);
+ { uint64_t x242; uint64_t x241 = _mulx_u64(x8, x11, &x242);
+ { uint64_t x245; uint64_t x244 = _mulx_u64(x8, x13, &x245);
+ { uint64_t x248; uint64_t x247 = _mulx_u64(x8, x15, &x248);
+ { uint64_t x251; uint64_t x250 = _mulx_u64(x8, x14, &x251);
+ { uint64_t x253; uint8_t x254 = _addcarryx_u64(0x0, x242, x244, &x253);
+ { uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x245, x247, &x256);
+ { uint64_t x259; uint8_t x260 = _addcarryx_u64(x257, x248, x250, &x259);
+ { uint64_t x262; uint8_t _ = _addcarryx_u64(0x0, x260, x251, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(0x0, x228, x241, &x265);
+ { uint64_t x268; uint8_t x269 = _addcarryx_u64(x266, x231, x253, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x234, x256, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x239, x262, &x277);
+ { uint64_t _; uint64_t x280 = _mulx_u64(x265, 0xf0f0f0f0f0f0f0f1L, &_);
+ { uint64_t x284; uint64_t x283 = _mulx_u64(x280, 0xffffffffffffffefL, &x284);
+ { uint64_t x287; uint64_t x286 = _mulx_u64(x280, 0xffffffffffffffffL, &x287);
+ { uint64_t x290; uint64_t x289 = _mulx_u64(x280, 0xffffffffffffffffL, &x290);
+ { uint64_t x292, uint8_t x293 = Op (Syntax.MulSplit 64 (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x280, 0x3f);
+ { uint64_t x295; uint8_t x296 = _addcarryx_u64(0x0, x284, x286, &x295);
+ { uint64_t x298; uint8_t x299 = _addcarryx_u64(x296, x287, x289, &x298);
+ { uint64_t x301; uint8_t x302 = _addcarryx_u64(x299, x290, x292, &x301);
+ { uint8_t x303 = (x302 + x293);
+ { uint64_t _; uint8_t x306 = _addcarryx_u64(0x0, x265, x283, &_);
+ { uint64_t x308; uint8_t x309 = _addcarryx_u64(x306, x268, x295, &x308);
+ { uint64_t x311; uint8_t x312 = _addcarryx_u64(x309, x271, x298, &x311);
+ { uint64_t x314; uint8_t x315 = _addcarryx_u64(x312, x274, x301, &x314);
+ { uint64_t x317; uint8_t x318 = _addcarryx_u64(x315, x277, x303, &x317);
+ { uint8_t x319 = (x318 + x278);
+ { uint64_t x321; uint8_t x322 = _subborrow_u64(0x0, x308, 0xffffffffffffffefL, &x321);
+ { uint64_t x324; uint8_t x325 = _subborrow_u64(x322, x311, 0xffffffffffffffffL, &x324);
+ { uint64_t x327; uint8_t x328 = _subborrow_u64(x325, x314, 0xffffffffffffffffL, &x327);
+ { uint64_t x330; uint8_t x331 = _subborrow_u64(x328, x317, 0x3f, &x330);
+ { uint64_t _; uint8_t x334 = _subborrow_u64(x331, x319, 0x0, &_);
+ { uint64_t x335 = cmovznz(x334, x330, x317);
+ { uint64_t x336 = cmovznz(x334, x327, x314);
+ { uint64_t x337 = cmovznz(x334, x324, x311);
+ { uint64_t x338 = cmovznz(x334, x321, x308);
+ out[0] = x338;
+ out[1] = x337;
+ out[2] = x336;
+ out[3] = x335;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e198m17/fenz.c b/src/Specific/montgomery64_2e198m17/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e198m17/fenz.c
+++ b/src/Specific/montgomery64_2e198m17/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e198m17/feopp.c b/src/Specific/montgomery64_2e198m17/feopp.c
index 67cf72890..2baa88e6a 100644
--- a/src/Specific/montgomery64_2e198m17/feopp.c
+++ b/src/Specific/montgomery64_2e198m17/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xffffffffffffffefL);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint8_t x32 = ((uint8_t)x19 & 0x3f);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xffffffffffffffefL);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint8_t x32 = ((uint8_t)x19 & 0x3f);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e198m17/fesub.c b/src/Specific/montgomery64_2e198m17/fesub.c
index 5e0aee748..fb1f51dfd 100644
--- a/src/Specific/montgomery64_2e198m17/fesub.c
+++ b/src/Specific/montgomery64_2e198m17/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xffffffffffffffefL);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint8_t x41 = ((uint8_t)x28 & 0x3f);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xffffffffffffffefL);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint8_t x41 = ((uint8_t)x28 & 0x3f);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e205m45x2e198m1/feadd.c b/src/Specific/montgomery64_2e205m45x2e198m1/feadd.c
index 5757066bc..6b0d0bae7 100644
--- a/src/Specific/montgomery64_2e205m45x2e198m1/feadd.c
+++ b/src/Specific/montgomery64_2e205m45x2e198m1/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffffL, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x14bf, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffffL, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x14bf, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e205m45x2e198m1/femul.c b/src/Specific/montgomery64_2e205m45x2e198m1/femul.c
index 19f690c56..035e6263e 100644
--- a/src/Specific/montgomery64_2e205m45x2e198m1/femul.c
+++ b/src/Specific/montgomery64_2e205m45x2e198m1/femul.c
@@ -1,136 +1,126 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t x42; uint64_t x41 = _mulx_u64(x17, 0xffffffffffffffffL, &x42);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x17, 0xffffffffffffffffL, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x17, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x17, 0x14bf, &x51);
-{ uint64_t x53; uint8_t x54 = _addcarryx_u64(0x0, x42, x44, &x53);
-{ uint64_t x56; uint8_t x57 = _addcarryx_u64(x54, x45, x47, &x56);
-{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
-{ uint64_t x62; uint8_t _ = _addcarryx_u64(0x0, x60, x51, &x62);
-{ uint64_t _; uint8_t x66 = _addcarryx_u64(0x0, x17, x41, &_);
-{ uint64_t x68; uint8_t x69 = _addcarryx_u64(x66, x29, x53, &x68);
-{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x32, x56, &x71);
-{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x35, x59, &x74);
-{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x38, x62, &x77);
-{ uint64_t x81; uint64_t x80 = _mulx_u64(x7, x11, &x81);
-{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x13, &x84);
-{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x15, &x87);
-{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x14, &x90);
-{ uint64_t x92; uint8_t x93 = _addcarryx_u64(0x0, x81, x83, &x92);
-{ uint64_t x95; uint8_t x96 = _addcarryx_u64(x93, x84, x86, &x95);
-{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
-{ uint64_t x101; uint8_t _ = _addcarryx_u64(0x0, x99, x90, &x101);
-{ uint64_t x104; uint8_t x105 = _addcarryx_u64(0x0, x68, x80, &x104);
-{ uint64_t x107; uint8_t x108 = _addcarryx_u64(x105, x71, x92, &x107);
-{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
-{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
-{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x78, x101, &x116);
-{ uint64_t x120; uint64_t x119 = _mulx_u64(x104, 0xffffffffffffffffL, &x120);
-{ uint64_t x123; uint64_t x122 = _mulx_u64(x104, 0xffffffffffffffffL, &x123);
-{ uint64_t x126; uint64_t x125 = _mulx_u64(x104, 0xffffffffffffffffL, &x126);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x104, 0x14bf, &x129);
-{ uint64_t x131; uint8_t x132 = _addcarryx_u64(0x0, x120, x122, &x131);
-{ uint64_t x134; uint8_t x135 = _addcarryx_u64(x132, x123, x125, &x134);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(x135, x126, x128, &x137);
-{ uint64_t x140; uint8_t _ = _addcarryx_u64(0x0, x138, x129, &x140);
-{ uint64_t _; uint8_t x144 = _addcarryx_u64(0x0, x104, x119, &_);
-{ uint64_t x146; uint8_t x147 = _addcarryx_u64(x144, x107, x131, &x146);
-{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x110, x134, &x149);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x113, x137, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x116, x140, &x155);
-{ uint8_t x157 = (x156 + x117);
-{ uint64_t x160; uint64_t x159 = _mulx_u64(x9, x11, &x160);
-{ uint64_t x163; uint64_t x162 = _mulx_u64(x9, x13, &x163);
-{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x15, &x166);
-{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x14, &x169);
-{ uint64_t x171; uint8_t x172 = _addcarryx_u64(0x0, x160, x162, &x171);
-{ uint64_t x174; uint8_t x175 = _addcarryx_u64(x172, x163, x165, &x174);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(x175, x166, x168, &x177);
-{ uint64_t x180; uint8_t _ = _addcarryx_u64(0x0, x178, x169, &x180);
-{ uint64_t x183; uint8_t x184 = _addcarryx_u64(0x0, x146, x159, &x183);
-{ uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x149, x171, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x152, x174, &x189);
-{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
-{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x157, x180, &x195);
-{ uint64_t x199; uint64_t x198 = _mulx_u64(x183, 0xffffffffffffffffL, &x199);
-{ uint64_t x202; uint64_t x201 = _mulx_u64(x183, 0xffffffffffffffffL, &x202);
-{ uint64_t x205; uint64_t x204 = _mulx_u64(x183, 0xffffffffffffffffL, &x205);
-{ uint64_t x208; uint64_t x207 = _mulx_u64(x183, 0x14bf, &x208);
-{ uint64_t x210; uint8_t x211 = _addcarryx_u64(0x0, x199, x201, &x210);
-{ uint64_t x213; uint8_t x214 = _addcarryx_u64(x211, x202, x204, &x213);
-{ uint64_t x216; uint8_t x217 = _addcarryx_u64(x214, x205, x207, &x216);
-{ uint64_t x219; uint8_t _ = _addcarryx_u64(0x0, x217, x208, &x219);
-{ uint64_t _; uint8_t x223 = _addcarryx_u64(0x0, x183, x198, &_);
-{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x186, x210, &x225);
-{ uint64_t x228; uint8_t x229 = _addcarryx_u64(x226, x189, x213, &x228);
-{ uint64_t x231; uint8_t x232 = _addcarryx_u64(x229, x192, x216, &x231);
-{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x195, x219, &x234);
-{ uint8_t x236 = (x235 + x196);
-{ uint64_t x239; uint64_t x238 = _mulx_u64(x8, x11, &x239);
-{ uint64_t x242; uint64_t x241 = _mulx_u64(x8, x13, &x242);
-{ uint64_t x245; uint64_t x244 = _mulx_u64(x8, x15, &x245);
-{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x14, &x248);
-{ uint64_t x250; uint8_t x251 = _addcarryx_u64(0x0, x239, x241, &x250);
-{ uint64_t x253; uint8_t x254 = _addcarryx_u64(x251, x242, x244, &x253);
-{ uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x245, x247, &x256);
-{ uint64_t x259; uint8_t _ = _addcarryx_u64(0x0, x257, x248, &x259);
-{ uint64_t x262; uint8_t x263 = _addcarryx_u64(0x0, x225, x238, &x262);
-{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x228, x250, &x265);
-{ uint64_t x268; uint8_t x269 = _addcarryx_u64(x266, x231, x253, &x268);
-{ uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x234, x256, &x271);
-{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x236, x259, &x274);
-{ uint64_t x278; uint64_t x277 = _mulx_u64(x262, 0xffffffffffffffffL, &x278);
-{ uint64_t x281; uint64_t x280 = _mulx_u64(x262, 0xffffffffffffffffL, &x281);
-{ uint64_t x284; uint64_t x283 = _mulx_u64(x262, 0xffffffffffffffffL, &x284);
-{ uint64_t x287; uint64_t x286 = _mulx_u64(x262, 0x14bf, &x287);
-{ uint64_t x289; uint8_t x290 = _addcarryx_u64(0x0, x278, x280, &x289);
-{ uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x281, x283, &x292);
-{ uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x284, x286, &x295);
-{ uint64_t x298; uint8_t _ = _addcarryx_u64(0x0, x296, x287, &x298);
-{ uint64_t _; uint8_t x302 = _addcarryx_u64(0x0, x262, x277, &_);
-{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x265, x289, &x304);
-{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x268, x292, &x307);
-{ uint64_t x310; uint8_t x311 = _addcarryx_u64(x308, x271, x295, &x310);
-{ uint64_t x313; uint8_t x314 = _addcarryx_u64(x311, x274, x298, &x313);
-{ uint8_t x315 = (x314 + x275);
-{ uint64_t x317; uint8_t x318 = _subborrow_u64(0x0, x304, 0xffffffffffffffffL, &x317);
-{ uint64_t x320; uint8_t x321 = _subborrow_u64(x318, x307, 0xffffffffffffffffL, &x320);
-{ uint64_t x323; uint8_t x324 = _subborrow_u64(x321, x310, 0xffffffffffffffffL, &x323);
-{ uint64_t x326; uint8_t x327 = _subborrow_u64(x324, x313, 0x14bf, &x326);
-{ uint64_t _; uint8_t x330 = _subborrow_u64(x327, x315, 0x0, &_);
-{ uint64_t x331 = cmovznz(x330, x326, x313);
-{ uint64_t x332 = cmovznz(x330, x323, x310);
-{ uint64_t x333 = cmovznz(x330, x320, x307);
-{ uint64_t x334 = cmovznz(x330, x317, x304);
-out[0] = x331;
-out[1] = x332;
-out[2] = x333;
-out[3] = x334;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t x42; uint64_t x41 = _mulx_u64(x17, 0xffffffffffffffffL, &x42);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x17, 0xffffffffffffffffL, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x17, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x17, 0x14bf, &x51);
+ { uint64_t x53; uint8_t x54 = _addcarryx_u64(0x0, x42, x44, &x53);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(x54, x45, x47, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+ { uint64_t x62; uint8_t _ = _addcarryx_u64(0x0, x60, x51, &x62);
+ { uint64_t _; uint8_t x66 = _addcarryx_u64(0x0, x17, x41, &_);
+ { uint64_t x68; uint8_t x69 = _addcarryx_u64(x66, x29, x53, &x68);
+ { uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x32, x56, &x71);
+ { uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x35, x59, &x74);
+ { uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x38, x62, &x77);
+ { uint64_t x81; uint64_t x80 = _mulx_u64(x7, x11, &x81);
+ { uint64_t x84; uint64_t x83 = _mulx_u64(x7, x13, &x84);
+ { uint64_t x87; uint64_t x86 = _mulx_u64(x7, x15, &x87);
+ { uint64_t x90; uint64_t x89 = _mulx_u64(x7, x14, &x90);
+ { uint64_t x92; uint8_t x93 = _addcarryx_u64(0x0, x81, x83, &x92);
+ { uint64_t x95; uint8_t x96 = _addcarryx_u64(x93, x84, x86, &x95);
+ { uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+ { uint64_t x101; uint8_t _ = _addcarryx_u64(0x0, x99, x90, &x101);
+ { uint64_t x104; uint8_t x105 = _addcarryx_u64(0x0, x68, x80, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(x105, x71, x92, &x107);
+ { uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+ { uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+ { uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x78, x101, &x116);
+ { uint64_t x120; uint64_t x119 = _mulx_u64(x104, 0xffffffffffffffffL, &x120);
+ { uint64_t x123; uint64_t x122 = _mulx_u64(x104, 0xffffffffffffffffL, &x123);
+ { uint64_t x126; uint64_t x125 = _mulx_u64(x104, 0xffffffffffffffffL, &x126);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x104, 0x14bf, &x129);
+ { uint64_t x131; uint8_t x132 = _addcarryx_u64(0x0, x120, x122, &x131);
+ { uint64_t x134; uint8_t x135 = _addcarryx_u64(x132, x123, x125, &x134);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(x135, x126, x128, &x137);
+ { uint64_t x140; uint8_t _ = _addcarryx_u64(0x0, x138, x129, &x140);
+ { uint64_t _; uint8_t x144 = _addcarryx_u64(0x0, x104, x119, &_);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(x144, x107, x131, &x146);
+ { uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x110, x134, &x149);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x113, x137, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x116, x140, &x155);
+ { uint8_t x157 = (x156 + x117);
+ { uint64_t x160; uint64_t x159 = _mulx_u64(x9, x11, &x160);
+ { uint64_t x163; uint64_t x162 = _mulx_u64(x9, x13, &x163);
+ { uint64_t x166; uint64_t x165 = _mulx_u64(x9, x15, &x166);
+ { uint64_t x169; uint64_t x168 = _mulx_u64(x9, x14, &x169);
+ { uint64_t x171; uint8_t x172 = _addcarryx_u64(0x0, x160, x162, &x171);
+ { uint64_t x174; uint8_t x175 = _addcarryx_u64(x172, x163, x165, &x174);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(x175, x166, x168, &x177);
+ { uint64_t x180; uint8_t _ = _addcarryx_u64(0x0, x178, x169, &x180);
+ { uint64_t x183; uint8_t x184 = _addcarryx_u64(0x0, x146, x159, &x183);
+ { uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x149, x171, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x152, x174, &x189);
+ { uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+ { uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x157, x180, &x195);
+ { uint64_t x199; uint64_t x198 = _mulx_u64(x183, 0xffffffffffffffffL, &x199);
+ { uint64_t x202; uint64_t x201 = _mulx_u64(x183, 0xffffffffffffffffL, &x202);
+ { uint64_t x205; uint64_t x204 = _mulx_u64(x183, 0xffffffffffffffffL, &x205);
+ { uint64_t x208; uint64_t x207 = _mulx_u64(x183, 0x14bf, &x208);
+ { uint64_t x210; uint8_t x211 = _addcarryx_u64(0x0, x199, x201, &x210);
+ { uint64_t x213; uint8_t x214 = _addcarryx_u64(x211, x202, x204, &x213);
+ { uint64_t x216; uint8_t x217 = _addcarryx_u64(x214, x205, x207, &x216);
+ { uint64_t x219; uint8_t _ = _addcarryx_u64(0x0, x217, x208, &x219);
+ { uint64_t _; uint8_t x223 = _addcarryx_u64(0x0, x183, x198, &_);
+ { uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x186, x210, &x225);
+ { uint64_t x228; uint8_t x229 = _addcarryx_u64(x226, x189, x213, &x228);
+ { uint64_t x231; uint8_t x232 = _addcarryx_u64(x229, x192, x216, &x231);
+ { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x195, x219, &x234);
+ { uint8_t x236 = (x235 + x196);
+ { uint64_t x239; uint64_t x238 = _mulx_u64(x8, x11, &x239);
+ { uint64_t x242; uint64_t x241 = _mulx_u64(x8, x13, &x242);
+ { uint64_t x245; uint64_t x244 = _mulx_u64(x8, x15, &x245);
+ { uint64_t x248; uint64_t x247 = _mulx_u64(x8, x14, &x248);
+ { uint64_t x250; uint8_t x251 = _addcarryx_u64(0x0, x239, x241, &x250);
+ { uint64_t x253; uint8_t x254 = _addcarryx_u64(x251, x242, x244, &x253);
+ { uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x245, x247, &x256);
+ { uint64_t x259; uint8_t _ = _addcarryx_u64(0x0, x257, x248, &x259);
+ { uint64_t x262; uint8_t x263 = _addcarryx_u64(0x0, x225, x238, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x228, x250, &x265);
+ { uint64_t x268; uint8_t x269 = _addcarryx_u64(x266, x231, x253, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x234, x256, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x236, x259, &x274);
+ { uint64_t x278; uint64_t x277 = _mulx_u64(x262, 0xffffffffffffffffL, &x278);
+ { uint64_t x281; uint64_t x280 = _mulx_u64(x262, 0xffffffffffffffffL, &x281);
+ { uint64_t x284; uint64_t x283 = _mulx_u64(x262, 0xffffffffffffffffL, &x284);
+ { uint64_t x287; uint64_t x286 = _mulx_u64(x262, 0x14bf, &x287);
+ { uint64_t x289; uint8_t x290 = _addcarryx_u64(0x0, x278, x280, &x289);
+ { uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x281, x283, &x292);
+ { uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x284, x286, &x295);
+ { uint64_t x298; uint8_t _ = _addcarryx_u64(0x0, x296, x287, &x298);
+ { uint64_t _; uint8_t x302 = _addcarryx_u64(0x0, x262, x277, &_);
+ { uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x265, x289, &x304);
+ { uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x268, x292, &x307);
+ { uint64_t x310; uint8_t x311 = _addcarryx_u64(x308, x271, x295, &x310);
+ { uint64_t x313; uint8_t x314 = _addcarryx_u64(x311, x274, x298, &x313);
+ { uint8_t x315 = (x314 + x275);
+ { uint64_t x317; uint8_t x318 = _subborrow_u64(0x0, x304, 0xffffffffffffffffL, &x317);
+ { uint64_t x320; uint8_t x321 = _subborrow_u64(x318, x307, 0xffffffffffffffffL, &x320);
+ { uint64_t x323; uint8_t x324 = _subborrow_u64(x321, x310, 0xffffffffffffffffL, &x323);
+ { uint64_t x326; uint8_t x327 = _subborrow_u64(x324, x313, 0x14bf, &x326);
+ { uint64_t _; uint8_t x330 = _subborrow_u64(x327, x315, 0x0, &_);
+ { uint64_t x331 = cmovznz(x330, x326, x313);
+ { uint64_t x332 = cmovznz(x330, x323, x310);
+ { uint64_t x333 = cmovznz(x330, x320, x307);
+ { uint64_t x334 = cmovznz(x330, x317, x304);
+ out[0] = x334;
+ out[1] = x333;
+ out[2] = x332;
+ out[3] = x331;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e205m45x2e198m1/fenz.c b/src/Specific/montgomery64_2e205m45x2e198m1/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e205m45x2e198m1/fenz.c
+++ b/src/Specific/montgomery64_2e205m45x2e198m1/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e205m45x2e198m1/feopp.c b/src/Specific/montgomery64_2e205m45x2e198m1/feopp.c
index 69835ea8e..ef25b23b0 100644
--- a/src/Specific/montgomery64_2e205m45x2e198m1/feopp.c
+++ b/src/Specific/montgomery64_2e205m45x2e198m1/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint64_t x32 = (x19 & 0x14bf);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint64_t x32 = (x19 & 0x14bf);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e205m45x2e198m1/fesub.c b/src/Specific/montgomery64_2e205m45x2e198m1/fesub.c
index e25106ebb..32970f5be 100644
--- a/src/Specific/montgomery64_2e205m45x2e198m1/fesub.c
+++ b/src/Specific/montgomery64_2e205m45x2e198m1/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint64_t x41 = (x28 & 0x14bf);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint64_t x41 = (x28 & 0x14bf);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e206m5/feadd.c b/src/Specific/montgomery64_2e206m5/feadd.c
index fe9a1b3ac..9d50cdc18 100644
--- a/src/Specific/montgomery64_2e206m5/feadd.c
+++ b/src/Specific/montgomery64_2e206m5/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffffbL, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x3fff, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffffbL, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x3fff, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e206m5/femul.c b/src/Specific/montgomery64_2e206m5/femul.c
index fdc1d8608..c0121407f 100644
--- a/src/Specific/montgomery64_2e206m5/femul.c
+++ b/src/Specific/montgomery64_2e206m5/femul.c
@@ -1,140 +1,130 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xcccccccccccccccdL, &_);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffffbL, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
-{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x3fff, &x54);
-{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
-{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
-{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
-{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
-{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
-{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
-{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
-{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
-{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
-{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
-{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
-{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
-{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
-{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
-{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
-{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
-{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
-{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
-{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
-{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
-{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
-{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xcccccccccccccccdL, &_);
-{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffffbL, &x126);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x3fff, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
-{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
-{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
-{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
-{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
-{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
-{ uint8_t x163 = (x162 + x120);
-{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
-{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
-{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
-{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
-{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
-{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
-{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
-{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
-{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
-{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
-{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
-{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xcccccccccccccccdL, &_);
-{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffffbL, &x208);
-{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
-{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
-{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x3fff, &x217);
-{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
-{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
-{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
-{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
-{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
-{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
-{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
-{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
-{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
-{ uint8_t x245 = (x244 + x202);
-{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
-{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
-{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
-{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
-{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
-{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
-{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
-{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
-{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
-{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
-{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
-{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
-{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
-{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xcccccccccccccccdL, &_);
-{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffffbL, &x290);
-{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
-{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
-{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x3fff, &x299);
-{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
-{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
-{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
-{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
-{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
-{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
-{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
-{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
-{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
-{ uint8_t x327 = (x326 + x284);
-{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffffbL, &x329);
-{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
-{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
-{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x3fff, &x338);
-{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
-{ uint64_t x343 = cmovznz(x342, x338, x325);
-{ uint64_t x344 = cmovznz(x342, x335, x322);
-{ uint64_t x345 = cmovznz(x342, x332, x319);
-{ uint64_t x346 = cmovznz(x342, x329, x316);
-out[0] = x343;
-out[1] = x344;
-out[2] = x345;
-out[3] = x346;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xcccccccccccccccdL, &_);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffffbL, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+ { uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x3fff, &x54);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+ { uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+ { uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+ { uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+ { uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+ { uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+ { uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+ { uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+ { uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+ { uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+ { uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+ { uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+ { uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+ { uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+ { uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+ { uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+ { uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+ { uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+ { uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+ { uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xcccccccccccccccdL, &_);
+ { uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffffbL, &x126);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x3fff, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+ { uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+ { uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+ { uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+ { uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+ { uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+ { uint8_t x163 = (x162 + x120);
+ { uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+ { uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+ { uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+ { uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+ { uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+ { uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+ { uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+ { uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+ { uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+ { uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+ { uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+ { uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xcccccccccccccccdL, &_);
+ { uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffffbL, &x208);
+ { uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+ { uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+ { uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x3fff, &x217);
+ { uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+ { uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+ { uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+ { uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+ { uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+ { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+ { uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+ { uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+ { uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+ { uint8_t x245 = (x244 + x202);
+ { uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+ { uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+ { uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+ { uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+ { uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+ { uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+ { uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+ { uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+ { uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+ { uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xcccccccccccccccdL, &_);
+ { uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffffbL, &x290);
+ { uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+ { uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+ { uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x3fff, &x299);
+ { uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+ { uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+ { uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+ { uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+ { uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+ { uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+ { uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+ { uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+ { uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+ { uint8_t x327 = (x326 + x284);
+ { uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffffbL, &x329);
+ { uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+ { uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+ { uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x3fff, &x338);
+ { uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+ { uint64_t x343 = cmovznz(x342, x338, x325);
+ { uint64_t x344 = cmovznz(x342, x335, x322);
+ { uint64_t x345 = cmovznz(x342, x332, x319);
+ { uint64_t x346 = cmovznz(x342, x329, x316);
+ out[0] = x346;
+ out[1] = x345;
+ out[2] = x344;
+ out[3] = x343;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e206m5/fenz.c b/src/Specific/montgomery64_2e206m5/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e206m5/fenz.c
+++ b/src/Specific/montgomery64_2e206m5/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e206m5/feopp.c b/src/Specific/montgomery64_2e206m5/feopp.c
index b1b027290..14d8541e4 100644
--- a/src/Specific/montgomery64_2e206m5/feopp.c
+++ b/src/Specific/montgomery64_2e206m5/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xfffffffffffffffbL);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint64_t x32 = (x19 & 0x3fff);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xfffffffffffffffbL);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint64_t x32 = (x19 & 0x3fff);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e206m5/fesub.c b/src/Specific/montgomery64_2e206m5/fesub.c
index 1d752a013..be1730986 100644
--- a/src/Specific/montgomery64_2e206m5/fesub.c
+++ b/src/Specific/montgomery64_2e206m5/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xfffffffffffffffbL);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint64_t x41 = (x28 & 0x3fff);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xfffffffffffffffbL);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint64_t x41 = (x28 & 0x3fff);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e212m29/feadd.c b/src/Specific/montgomery64_2e212m29/feadd.c
index 65a6b69be..858118997 100644
--- a/src/Specific/montgomery64_2e212m29/feadd.c
+++ b/src/Specific/montgomery64_2e212m29/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffe3L, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xfffff, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffe3L, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xfffff, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e212m29/femul.c b/src/Specific/montgomery64_2e212m29/femul.c
index a021c5c22..4c1c11e33 100644
--- a/src/Specific/montgomery64_2e212m29/femul.c
+++ b/src/Specific/montgomery64_2e212m29/femul.c
@@ -1,140 +1,130 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0x34f72c234f72c235, &_);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffffe3L, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
-{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0xfffff, &x54);
-{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
-{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
-{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
-{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
-{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
-{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
-{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
-{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
-{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
-{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
-{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
-{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
-{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
-{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
-{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
-{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
-{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
-{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
-{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
-{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
-{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
-{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0x34f72c234f72c235, &_);
-{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xffffffffffffffe3L, &x126);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0xfffff, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
-{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
-{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
-{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
-{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
-{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
-{ uint8_t x163 = (x162 + x120);
-{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
-{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
-{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
-{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
-{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
-{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
-{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
-{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
-{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
-{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
-{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
-{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0x34f72c234f72c235, &_);
-{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xffffffffffffffe3L, &x208);
-{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
-{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
-{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0xfffff, &x217);
-{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
-{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
-{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
-{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
-{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
-{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
-{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
-{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
-{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
-{ uint8_t x245 = (x244 + x202);
-{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
-{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
-{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
-{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
-{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
-{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
-{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
-{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
-{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
-{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
-{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
-{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
-{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
-{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0x34f72c234f72c235, &_);
-{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xffffffffffffffe3L, &x290);
-{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
-{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
-{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0xfffff, &x299);
-{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
-{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
-{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
-{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
-{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
-{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
-{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
-{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
-{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
-{ uint8_t x327 = (x326 + x284);
-{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xffffffffffffffe3L, &x329);
-{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
-{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
-{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0xfffff, &x338);
-{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
-{ uint64_t x343 = cmovznz(x342, x338, x325);
-{ uint64_t x344 = cmovznz(x342, x335, x322);
-{ uint64_t x345 = cmovznz(x342, x332, x319);
-{ uint64_t x346 = cmovznz(x342, x329, x316);
-out[0] = x343;
-out[1] = x344;
-out[2] = x345;
-out[3] = x346;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t _; uint64_t x41 = _mulx_u64(x17, 0x34f72c234f72c235, &_);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffffe3L, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+ { uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0xfffff, &x54);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+ { uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+ { uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+ { uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+ { uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+ { uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+ { uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+ { uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+ { uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+ { uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+ { uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+ { uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+ { uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+ { uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+ { uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+ { uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+ { uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+ { uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+ { uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+ { uint64_t _; uint64_t x122 = _mulx_u64(x107, 0x34f72c234f72c235, &_);
+ { uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xffffffffffffffe3L, &x126);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0xfffff, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+ { uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+ { uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+ { uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+ { uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+ { uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+ { uint8_t x163 = (x162 + x120);
+ { uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+ { uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+ { uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+ { uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+ { uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+ { uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+ { uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+ { uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+ { uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+ { uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+ { uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+ { uint64_t _; uint64_t x204 = _mulx_u64(x189, 0x34f72c234f72c235, &_);
+ { uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xffffffffffffffe3L, &x208);
+ { uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+ { uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+ { uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0xfffff, &x217);
+ { uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+ { uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+ { uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+ { uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+ { uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+ { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+ { uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+ { uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+ { uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+ { uint8_t x245 = (x244 + x202);
+ { uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+ { uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+ { uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+ { uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+ { uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+ { uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+ { uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+ { uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+ { uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+ { uint64_t _; uint64_t x286 = _mulx_u64(x271, 0x34f72c234f72c235, &_);
+ { uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xffffffffffffffe3L, &x290);
+ { uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+ { uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+ { uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0xfffff, &x299);
+ { uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+ { uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+ { uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+ { uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+ { uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+ { uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+ { uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+ { uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+ { uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+ { uint8_t x327 = (x326 + x284);
+ { uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xffffffffffffffe3L, &x329);
+ { uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+ { uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+ { uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0xfffff, &x338);
+ { uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+ { uint64_t x343 = cmovznz(x342, x338, x325);
+ { uint64_t x344 = cmovznz(x342, x335, x322);
+ { uint64_t x345 = cmovznz(x342, x332, x319);
+ { uint64_t x346 = cmovznz(x342, x329, x316);
+ out[0] = x346;
+ out[1] = x345;
+ out[2] = x344;
+ out[3] = x343;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e212m29/fenz.c b/src/Specific/montgomery64_2e212m29/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e212m29/fenz.c
+++ b/src/Specific/montgomery64_2e212m29/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e212m29/feopp.c b/src/Specific/montgomery64_2e212m29/feopp.c
index 83b1091d5..c379243c6 100644
--- a/src/Specific/montgomery64_2e212m29/feopp.c
+++ b/src/Specific/montgomery64_2e212m29/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xffffffffffffffe3L);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint64_t x32 = (x19 & 0xfffff);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xffffffffffffffe3L);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint64_t x32 = (x19 & 0xfffff);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e212m29/fesub.c b/src/Specific/montgomery64_2e212m29/fesub.c
index 1bee2bc99..b606ceaac 100644
--- a/src/Specific/montgomery64_2e212m29/fesub.c
+++ b/src/Specific/montgomery64_2e212m29/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xffffffffffffffe3L);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint64_t x41 = (x28 & 0xfffff);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xffffffffffffffe3L);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint64_t x41 = (x28 & 0xfffff);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e213m3/feadd.c b/src/Specific/montgomery64_2e213m3/feadd.c
index 39bcecccd..4c4261e60 100644
--- a/src/Specific/montgomery64_2e213m3/feadd.c
+++ b/src/Specific/montgomery64_2e213m3/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffffdL, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x1fffff, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffffdL, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x1fffff, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e213m3/femul.c b/src/Specific/montgomery64_2e213m3/femul.c
index ed8e86b46..0879300f2 100644
--- a/src/Specific/montgomery64_2e213m3/femul.c
+++ b/src/Specific/montgomery64_2e213m3/femul.c
@@ -1,140 +1,130 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffffdL, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
-{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x1fffff, &x54);
-{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
-{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
-{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
-{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
-{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
-{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
-{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
-{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
-{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
-{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
-{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
-{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
-{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
-{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
-{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
-{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
-{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
-{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
-{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
-{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
-{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
-{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffffdL, &x126);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x1fffff, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
-{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
-{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
-{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
-{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
-{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
-{ uint8_t x163 = (x162 + x120);
-{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
-{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
-{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
-{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
-{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
-{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
-{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
-{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
-{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
-{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
-{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
-{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffffdL, &x208);
-{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
-{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
-{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x1fffff, &x217);
-{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
-{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
-{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
-{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
-{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
-{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
-{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
-{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
-{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
-{ uint8_t x245 = (x244 + x202);
-{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
-{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
-{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
-{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
-{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
-{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
-{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
-{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
-{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
-{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
-{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
-{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
-{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
-{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffffdL, &x290);
-{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
-{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
-{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x1fffff, &x299);
-{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
-{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
-{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
-{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
-{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
-{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
-{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
-{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
-{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
-{ uint8_t x327 = (x326 + x284);
-{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffffdL, &x329);
-{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
-{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
-{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x1fffff, &x338);
-{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
-{ uint64_t x343 = cmovznz(x342, x338, x325);
-{ uint64_t x344 = cmovznz(x342, x335, x322);
-{ uint64_t x345 = cmovznz(x342, x332, x319);
-{ uint64_t x346 = cmovznz(x342, x329, x316);
-out[0] = x343;
-out[1] = x344;
-out[2] = x345;
-out[3] = x346;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffffdL, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+ { uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x1fffff, &x54);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+ { uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+ { uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+ { uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+ { uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+ { uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+ { uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+ { uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+ { uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+ { uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+ { uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+ { uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+ { uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+ { uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+ { uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+ { uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+ { uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+ { uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+ { uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+ { uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffffdL, &x126);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x1fffff, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+ { uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+ { uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+ { uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+ { uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+ { uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+ { uint8_t x163 = (x162 + x120);
+ { uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+ { uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+ { uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+ { uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+ { uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+ { uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+ { uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+ { uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+ { uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+ { uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+ { uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+ { uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffffdL, &x208);
+ { uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+ { uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+ { uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x1fffff, &x217);
+ { uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+ { uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+ { uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+ { uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+ { uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+ { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+ { uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+ { uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+ { uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+ { uint8_t x245 = (x244 + x202);
+ { uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+ { uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+ { uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+ { uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+ { uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+ { uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+ { uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+ { uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+ { uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+ { uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffffdL, &x290);
+ { uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+ { uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+ { uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x1fffff, &x299);
+ { uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+ { uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+ { uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+ { uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+ { uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+ { uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+ { uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+ { uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+ { uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+ { uint8_t x327 = (x326 + x284);
+ { uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffffdL, &x329);
+ { uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+ { uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+ { uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x1fffff, &x338);
+ { uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+ { uint64_t x343 = cmovznz(x342, x338, x325);
+ { uint64_t x344 = cmovznz(x342, x335, x322);
+ { uint64_t x345 = cmovznz(x342, x332, x319);
+ { uint64_t x346 = cmovznz(x342, x329, x316);
+ out[0] = x346;
+ out[1] = x345;
+ out[2] = x344;
+ out[3] = x343;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e213m3/fenz.c b/src/Specific/montgomery64_2e213m3/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e213m3/fenz.c
+++ b/src/Specific/montgomery64_2e213m3/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e213m3/feopp.c b/src/Specific/montgomery64_2e213m3/feopp.c
index 8ea13e52a..729136126 100644
--- a/src/Specific/montgomery64_2e213m3/feopp.c
+++ b/src/Specific/montgomery64_2e213m3/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xfffffffffffffffdL);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint64_t x32 = (x19 & 0x1fffff);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xfffffffffffffffdL);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint64_t x32 = (x19 & 0x1fffff);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e213m3/fesub.c b/src/Specific/montgomery64_2e213m3/fesub.c
index 1fd39286e..fb27eed38 100644
--- a/src/Specific/montgomery64_2e213m3/fesub.c
+++ b/src/Specific/montgomery64_2e213m3/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xfffffffffffffffdL);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint64_t x41 = (x28 & 0x1fffff);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xfffffffffffffffdL);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint64_t x41 = (x28 & 0x1fffff);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e216m2e108m1/feadd.c b/src/Specific/montgomery64_2e216m2e108m1/feadd.c
index 2ca6e6702..20d319c23 100644
--- a/src/Specific/montgomery64_2e216m2e108m1/feadd.c
+++ b/src/Specific/montgomery64_2e216m2e108m1/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffffL, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffefffffffffffL, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xffffff, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffffL, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffefffffffffffL, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xffffff, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e216m2e108m1/femul.c b/src/Specific/montgomery64_2e216m2e108m1/femul.c
index 837a92173..8b4974b21 100644
--- a/src/Specific/montgomery64_2e216m2e108m1/femul.c
+++ b/src/Specific/montgomery64_2e216m2e108m1/femul.c
@@ -1,136 +1,126 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t x42; uint64_t x41 = _mulx_u64(x17, 0xffffffffffffffffL, &x42);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x17, 0xffffefffffffffffL, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x17, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x17, 0xffffff, &x51);
-{ uint64_t x53; uint8_t x54 = _addcarryx_u64(0x0, x42, x44, &x53);
-{ uint64_t x56; uint8_t x57 = _addcarryx_u64(x54, x45, x47, &x56);
-{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
-{ uint64_t x62; uint8_t _ = _addcarryx_u64(0x0, x60, x51, &x62);
-{ uint64_t _; uint8_t x66 = _addcarryx_u64(0x0, x17, x41, &_);
-{ uint64_t x68; uint8_t x69 = _addcarryx_u64(x66, x29, x53, &x68);
-{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x32, x56, &x71);
-{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x35, x59, &x74);
-{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x38, x62, &x77);
-{ uint64_t x81; uint64_t x80 = _mulx_u64(x7, x11, &x81);
-{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x13, &x84);
-{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x15, &x87);
-{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x14, &x90);
-{ uint64_t x92; uint8_t x93 = _addcarryx_u64(0x0, x81, x83, &x92);
-{ uint64_t x95; uint8_t x96 = _addcarryx_u64(x93, x84, x86, &x95);
-{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
-{ uint64_t x101; uint8_t _ = _addcarryx_u64(0x0, x99, x90, &x101);
-{ uint64_t x104; uint8_t x105 = _addcarryx_u64(0x0, x68, x80, &x104);
-{ uint64_t x107; uint8_t x108 = _addcarryx_u64(x105, x71, x92, &x107);
-{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
-{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
-{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x78, x101, &x116);
-{ uint64_t x120; uint64_t x119 = _mulx_u64(x104, 0xffffffffffffffffL, &x120);
-{ uint64_t x123; uint64_t x122 = _mulx_u64(x104, 0xffffefffffffffffL, &x123);
-{ uint64_t x126; uint64_t x125 = _mulx_u64(x104, 0xffffffffffffffffL, &x126);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x104, 0xffffff, &x129);
-{ uint64_t x131; uint8_t x132 = _addcarryx_u64(0x0, x120, x122, &x131);
-{ uint64_t x134; uint8_t x135 = _addcarryx_u64(x132, x123, x125, &x134);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(x135, x126, x128, &x137);
-{ uint64_t x140; uint8_t _ = _addcarryx_u64(0x0, x138, x129, &x140);
-{ uint64_t _; uint8_t x144 = _addcarryx_u64(0x0, x104, x119, &_);
-{ uint64_t x146; uint8_t x147 = _addcarryx_u64(x144, x107, x131, &x146);
-{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x110, x134, &x149);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x113, x137, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x116, x140, &x155);
-{ uint8_t x157 = (x156 + x117);
-{ uint64_t x160; uint64_t x159 = _mulx_u64(x9, x11, &x160);
-{ uint64_t x163; uint64_t x162 = _mulx_u64(x9, x13, &x163);
-{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x15, &x166);
-{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x14, &x169);
-{ uint64_t x171; uint8_t x172 = _addcarryx_u64(0x0, x160, x162, &x171);
-{ uint64_t x174; uint8_t x175 = _addcarryx_u64(x172, x163, x165, &x174);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(x175, x166, x168, &x177);
-{ uint64_t x180; uint8_t _ = _addcarryx_u64(0x0, x178, x169, &x180);
-{ uint64_t x183; uint8_t x184 = _addcarryx_u64(0x0, x146, x159, &x183);
-{ uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x149, x171, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x152, x174, &x189);
-{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
-{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x157, x180, &x195);
-{ uint64_t x199; uint64_t x198 = _mulx_u64(x183, 0xffffffffffffffffL, &x199);
-{ uint64_t x202; uint64_t x201 = _mulx_u64(x183, 0xffffefffffffffffL, &x202);
-{ uint64_t x205; uint64_t x204 = _mulx_u64(x183, 0xffffffffffffffffL, &x205);
-{ uint64_t x208; uint64_t x207 = _mulx_u64(x183, 0xffffff, &x208);
-{ uint64_t x210; uint8_t x211 = _addcarryx_u64(0x0, x199, x201, &x210);
-{ uint64_t x213; uint8_t x214 = _addcarryx_u64(x211, x202, x204, &x213);
-{ uint64_t x216; uint8_t x217 = _addcarryx_u64(x214, x205, x207, &x216);
-{ uint64_t x219; uint8_t _ = _addcarryx_u64(0x0, x217, x208, &x219);
-{ uint64_t _; uint8_t x223 = _addcarryx_u64(0x0, x183, x198, &_);
-{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x186, x210, &x225);
-{ uint64_t x228; uint8_t x229 = _addcarryx_u64(x226, x189, x213, &x228);
-{ uint64_t x231; uint8_t x232 = _addcarryx_u64(x229, x192, x216, &x231);
-{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x195, x219, &x234);
-{ uint8_t x236 = (x235 + x196);
-{ uint64_t x239; uint64_t x238 = _mulx_u64(x8, x11, &x239);
-{ uint64_t x242; uint64_t x241 = _mulx_u64(x8, x13, &x242);
-{ uint64_t x245; uint64_t x244 = _mulx_u64(x8, x15, &x245);
-{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x14, &x248);
-{ uint64_t x250; uint8_t x251 = _addcarryx_u64(0x0, x239, x241, &x250);
-{ uint64_t x253; uint8_t x254 = _addcarryx_u64(x251, x242, x244, &x253);
-{ uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x245, x247, &x256);
-{ uint64_t x259; uint8_t _ = _addcarryx_u64(0x0, x257, x248, &x259);
-{ uint64_t x262; uint8_t x263 = _addcarryx_u64(0x0, x225, x238, &x262);
-{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x228, x250, &x265);
-{ uint64_t x268; uint8_t x269 = _addcarryx_u64(x266, x231, x253, &x268);
-{ uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x234, x256, &x271);
-{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x236, x259, &x274);
-{ uint64_t x278; uint64_t x277 = _mulx_u64(x262, 0xffffffffffffffffL, &x278);
-{ uint64_t x281; uint64_t x280 = _mulx_u64(x262, 0xffffefffffffffffL, &x281);
-{ uint64_t x284; uint64_t x283 = _mulx_u64(x262, 0xffffffffffffffffL, &x284);
-{ uint64_t x287; uint64_t x286 = _mulx_u64(x262, 0xffffff, &x287);
-{ uint64_t x289; uint8_t x290 = _addcarryx_u64(0x0, x278, x280, &x289);
-{ uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x281, x283, &x292);
-{ uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x284, x286, &x295);
-{ uint64_t x298; uint8_t _ = _addcarryx_u64(0x0, x296, x287, &x298);
-{ uint64_t _; uint8_t x302 = _addcarryx_u64(0x0, x262, x277, &_);
-{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x265, x289, &x304);
-{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x268, x292, &x307);
-{ uint64_t x310; uint8_t x311 = _addcarryx_u64(x308, x271, x295, &x310);
-{ uint64_t x313; uint8_t x314 = _addcarryx_u64(x311, x274, x298, &x313);
-{ uint8_t x315 = (x314 + x275);
-{ uint64_t x317; uint8_t x318 = _subborrow_u64(0x0, x304, 0xffffffffffffffffL, &x317);
-{ uint64_t x320; uint8_t x321 = _subborrow_u64(x318, x307, 0xffffefffffffffffL, &x320);
-{ uint64_t x323; uint8_t x324 = _subborrow_u64(x321, x310, 0xffffffffffffffffL, &x323);
-{ uint64_t x326; uint8_t x327 = _subborrow_u64(x324, x313, 0xffffff, &x326);
-{ uint64_t _; uint8_t x330 = _subborrow_u64(x327, x315, 0x0, &_);
-{ uint64_t x331 = cmovznz(x330, x326, x313);
-{ uint64_t x332 = cmovznz(x330, x323, x310);
-{ uint64_t x333 = cmovznz(x330, x320, x307);
-{ uint64_t x334 = cmovznz(x330, x317, x304);
-out[0] = x331;
-out[1] = x332;
-out[2] = x333;
-out[3] = x334;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t x42; uint64_t x41 = _mulx_u64(x17, 0xffffffffffffffffL, &x42);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x17, 0xffffefffffffffffL, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x17, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x17, 0xffffff, &x51);
+ { uint64_t x53; uint8_t x54 = _addcarryx_u64(0x0, x42, x44, &x53);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(x54, x45, x47, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+ { uint64_t x62; uint8_t _ = _addcarryx_u64(0x0, x60, x51, &x62);
+ { uint64_t _; uint8_t x66 = _addcarryx_u64(0x0, x17, x41, &_);
+ { uint64_t x68; uint8_t x69 = _addcarryx_u64(x66, x29, x53, &x68);
+ { uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x32, x56, &x71);
+ { uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x35, x59, &x74);
+ { uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x38, x62, &x77);
+ { uint64_t x81; uint64_t x80 = _mulx_u64(x7, x11, &x81);
+ { uint64_t x84; uint64_t x83 = _mulx_u64(x7, x13, &x84);
+ { uint64_t x87; uint64_t x86 = _mulx_u64(x7, x15, &x87);
+ { uint64_t x90; uint64_t x89 = _mulx_u64(x7, x14, &x90);
+ { uint64_t x92; uint8_t x93 = _addcarryx_u64(0x0, x81, x83, &x92);
+ { uint64_t x95; uint8_t x96 = _addcarryx_u64(x93, x84, x86, &x95);
+ { uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+ { uint64_t x101; uint8_t _ = _addcarryx_u64(0x0, x99, x90, &x101);
+ { uint64_t x104; uint8_t x105 = _addcarryx_u64(0x0, x68, x80, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(x105, x71, x92, &x107);
+ { uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+ { uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+ { uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x78, x101, &x116);
+ { uint64_t x120; uint64_t x119 = _mulx_u64(x104, 0xffffffffffffffffL, &x120);
+ { uint64_t x123; uint64_t x122 = _mulx_u64(x104, 0xffffefffffffffffL, &x123);
+ { uint64_t x126; uint64_t x125 = _mulx_u64(x104, 0xffffffffffffffffL, &x126);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x104, 0xffffff, &x129);
+ { uint64_t x131; uint8_t x132 = _addcarryx_u64(0x0, x120, x122, &x131);
+ { uint64_t x134; uint8_t x135 = _addcarryx_u64(x132, x123, x125, &x134);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(x135, x126, x128, &x137);
+ { uint64_t x140; uint8_t _ = _addcarryx_u64(0x0, x138, x129, &x140);
+ { uint64_t _; uint8_t x144 = _addcarryx_u64(0x0, x104, x119, &_);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(x144, x107, x131, &x146);
+ { uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x110, x134, &x149);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x113, x137, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x116, x140, &x155);
+ { uint8_t x157 = (x156 + x117);
+ { uint64_t x160; uint64_t x159 = _mulx_u64(x9, x11, &x160);
+ { uint64_t x163; uint64_t x162 = _mulx_u64(x9, x13, &x163);
+ { uint64_t x166; uint64_t x165 = _mulx_u64(x9, x15, &x166);
+ { uint64_t x169; uint64_t x168 = _mulx_u64(x9, x14, &x169);
+ { uint64_t x171; uint8_t x172 = _addcarryx_u64(0x0, x160, x162, &x171);
+ { uint64_t x174; uint8_t x175 = _addcarryx_u64(x172, x163, x165, &x174);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(x175, x166, x168, &x177);
+ { uint64_t x180; uint8_t _ = _addcarryx_u64(0x0, x178, x169, &x180);
+ { uint64_t x183; uint8_t x184 = _addcarryx_u64(0x0, x146, x159, &x183);
+ { uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x149, x171, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x152, x174, &x189);
+ { uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+ { uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x157, x180, &x195);
+ { uint64_t x199; uint64_t x198 = _mulx_u64(x183, 0xffffffffffffffffL, &x199);
+ { uint64_t x202; uint64_t x201 = _mulx_u64(x183, 0xffffefffffffffffL, &x202);
+ { uint64_t x205; uint64_t x204 = _mulx_u64(x183, 0xffffffffffffffffL, &x205);
+ { uint64_t x208; uint64_t x207 = _mulx_u64(x183, 0xffffff, &x208);
+ { uint64_t x210; uint8_t x211 = _addcarryx_u64(0x0, x199, x201, &x210);
+ { uint64_t x213; uint8_t x214 = _addcarryx_u64(x211, x202, x204, &x213);
+ { uint64_t x216; uint8_t x217 = _addcarryx_u64(x214, x205, x207, &x216);
+ { uint64_t x219; uint8_t _ = _addcarryx_u64(0x0, x217, x208, &x219);
+ { uint64_t _; uint8_t x223 = _addcarryx_u64(0x0, x183, x198, &_);
+ { uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x186, x210, &x225);
+ { uint64_t x228; uint8_t x229 = _addcarryx_u64(x226, x189, x213, &x228);
+ { uint64_t x231; uint8_t x232 = _addcarryx_u64(x229, x192, x216, &x231);
+ { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x195, x219, &x234);
+ { uint8_t x236 = (x235 + x196);
+ { uint64_t x239; uint64_t x238 = _mulx_u64(x8, x11, &x239);
+ { uint64_t x242; uint64_t x241 = _mulx_u64(x8, x13, &x242);
+ { uint64_t x245; uint64_t x244 = _mulx_u64(x8, x15, &x245);
+ { uint64_t x248; uint64_t x247 = _mulx_u64(x8, x14, &x248);
+ { uint64_t x250; uint8_t x251 = _addcarryx_u64(0x0, x239, x241, &x250);
+ { uint64_t x253; uint8_t x254 = _addcarryx_u64(x251, x242, x244, &x253);
+ { uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x245, x247, &x256);
+ { uint64_t x259; uint8_t _ = _addcarryx_u64(0x0, x257, x248, &x259);
+ { uint64_t x262; uint8_t x263 = _addcarryx_u64(0x0, x225, x238, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x228, x250, &x265);
+ { uint64_t x268; uint8_t x269 = _addcarryx_u64(x266, x231, x253, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x234, x256, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x236, x259, &x274);
+ { uint64_t x278; uint64_t x277 = _mulx_u64(x262, 0xffffffffffffffffL, &x278);
+ { uint64_t x281; uint64_t x280 = _mulx_u64(x262, 0xffffefffffffffffL, &x281);
+ { uint64_t x284; uint64_t x283 = _mulx_u64(x262, 0xffffffffffffffffL, &x284);
+ { uint64_t x287; uint64_t x286 = _mulx_u64(x262, 0xffffff, &x287);
+ { uint64_t x289; uint8_t x290 = _addcarryx_u64(0x0, x278, x280, &x289);
+ { uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x281, x283, &x292);
+ { uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x284, x286, &x295);
+ { uint64_t x298; uint8_t _ = _addcarryx_u64(0x0, x296, x287, &x298);
+ { uint64_t _; uint8_t x302 = _addcarryx_u64(0x0, x262, x277, &_);
+ { uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x265, x289, &x304);
+ { uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x268, x292, &x307);
+ { uint64_t x310; uint8_t x311 = _addcarryx_u64(x308, x271, x295, &x310);
+ { uint64_t x313; uint8_t x314 = _addcarryx_u64(x311, x274, x298, &x313);
+ { uint8_t x315 = (x314 + x275);
+ { uint64_t x317; uint8_t x318 = _subborrow_u64(0x0, x304, 0xffffffffffffffffL, &x317);
+ { uint64_t x320; uint8_t x321 = _subborrow_u64(x318, x307, 0xffffefffffffffffL, &x320);
+ { uint64_t x323; uint8_t x324 = _subborrow_u64(x321, x310, 0xffffffffffffffffL, &x323);
+ { uint64_t x326; uint8_t x327 = _subborrow_u64(x324, x313, 0xffffff, &x326);
+ { uint64_t _; uint8_t x330 = _subborrow_u64(x327, x315, 0x0, &_);
+ { uint64_t x331 = cmovznz(x330, x326, x313);
+ { uint64_t x332 = cmovznz(x330, x323, x310);
+ { uint64_t x333 = cmovznz(x330, x320, x307);
+ { uint64_t x334 = cmovznz(x330, x317, x304);
+ out[0] = x334;
+ out[1] = x333;
+ out[2] = x332;
+ out[3] = x331;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e216m2e108m1/fenz.c b/src/Specific/montgomery64_2e216m2e108m1/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e216m2e108m1/fenz.c
+++ b/src/Specific/montgomery64_2e216m2e108m1/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e216m2e108m1/feopp.c b/src/Specific/montgomery64_2e216m2e108m1/feopp.c
index 4a2e77a0c..5f1bb5030 100644
--- a/src/Specific/montgomery64_2e216m2e108m1/feopp.c
+++ b/src/Specific/montgomery64_2e216m2e108m1/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffefffffffffffL);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint64_t x32 = (x19 & 0xffffff);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffefffffffffffL);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint64_t x32 = (x19 & 0xffffff);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e216m2e108m1/fesub.c b/src/Specific/montgomery64_2e216m2e108m1/fesub.c
index fdaea4034..c9d9565dc 100644
--- a/src/Specific/montgomery64_2e216m2e108m1/fesub.c
+++ b/src/Specific/montgomery64_2e216m2e108m1/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffefffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint64_t x41 = (x28 & 0xffffff);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffefffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint64_t x41 = (x28 & 0xffffff);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e221m3/feadd.c b/src/Specific/montgomery64_2e221m3/feadd.c
index 77bc256ad..4be18d8bc 100644
--- a/src/Specific/montgomery64_2e221m3/feadd.c
+++ b/src/Specific/montgomery64_2e221m3/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffffdL, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x1fffffff, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffffdL, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x1fffffff, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e221m3/femul.c b/src/Specific/montgomery64_2e221m3/femul.c
index 2ba283618..2e1413c75 100644
--- a/src/Specific/montgomery64_2e221m3/femul.c
+++ b/src/Specific/montgomery64_2e221m3/femul.c
@@ -1,140 +1,130 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffffdL, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
-{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x1fffffff, &x54);
-{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
-{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
-{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
-{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
-{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
-{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
-{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
-{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
-{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
-{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
-{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
-{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
-{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
-{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
-{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
-{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
-{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
-{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
-{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
-{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
-{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
-{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffffdL, &x126);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x1fffffff, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
-{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
-{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
-{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
-{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
-{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
-{ uint8_t x163 = (x162 + x120);
-{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
-{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
-{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
-{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
-{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
-{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
-{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
-{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
-{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
-{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
-{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
-{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffffdL, &x208);
-{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
-{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
-{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x1fffffff, &x217);
-{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
-{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
-{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
-{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
-{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
-{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
-{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
-{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
-{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
-{ uint8_t x245 = (x244 + x202);
-{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
-{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
-{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
-{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
-{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
-{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
-{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
-{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
-{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
-{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
-{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
-{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
-{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
-{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffffdL, &x290);
-{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
-{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
-{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x1fffffff, &x299);
-{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
-{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
-{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
-{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
-{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
-{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
-{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
-{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
-{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
-{ uint8_t x327 = (x326 + x284);
-{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffffdL, &x329);
-{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
-{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
-{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x1fffffff, &x338);
-{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
-{ uint64_t x343 = cmovznz(x342, x338, x325);
-{ uint64_t x344 = cmovznz(x342, x335, x322);
-{ uint64_t x345 = cmovznz(x342, x332, x319);
-{ uint64_t x346 = cmovznz(x342, x329, x316);
-out[0] = x343;
-out[1] = x344;
-out[2] = x345;
-out[3] = x346;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffffdL, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+ { uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x1fffffff, &x54);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+ { uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+ { uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+ { uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+ { uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+ { uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+ { uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+ { uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+ { uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+ { uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+ { uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+ { uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+ { uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+ { uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+ { uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+ { uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+ { uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+ { uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+ { uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+ { uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffffdL, &x126);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x1fffffff, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+ { uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+ { uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+ { uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+ { uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+ { uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+ { uint8_t x163 = (x162 + x120);
+ { uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+ { uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+ { uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+ { uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+ { uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+ { uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+ { uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+ { uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+ { uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+ { uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+ { uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+ { uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffffdL, &x208);
+ { uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+ { uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+ { uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x1fffffff, &x217);
+ { uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+ { uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+ { uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+ { uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+ { uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+ { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+ { uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+ { uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+ { uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+ { uint8_t x245 = (x244 + x202);
+ { uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+ { uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+ { uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+ { uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+ { uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+ { uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+ { uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+ { uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+ { uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+ { uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffffdL, &x290);
+ { uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+ { uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+ { uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x1fffffff, &x299);
+ { uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+ { uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+ { uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+ { uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+ { uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+ { uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+ { uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+ { uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+ { uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+ { uint8_t x327 = (x326 + x284);
+ { uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffffdL, &x329);
+ { uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+ { uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+ { uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x1fffffff, &x338);
+ { uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+ { uint64_t x343 = cmovznz(x342, x338, x325);
+ { uint64_t x344 = cmovznz(x342, x335, x322);
+ { uint64_t x345 = cmovznz(x342, x332, x319);
+ { uint64_t x346 = cmovznz(x342, x329, x316);
+ out[0] = x346;
+ out[1] = x345;
+ out[2] = x344;
+ out[3] = x343;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e221m3/fenz.c b/src/Specific/montgomery64_2e221m3/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e221m3/fenz.c
+++ b/src/Specific/montgomery64_2e221m3/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e221m3/feopp.c b/src/Specific/montgomery64_2e221m3/feopp.c
index 2a8bfeaee..0c0255d93 100644
--- a/src/Specific/montgomery64_2e221m3/feopp.c
+++ b/src/Specific/montgomery64_2e221m3/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xfffffffffffffffdL);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint64_t x32 = (x19 & 0x1fffffff);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xfffffffffffffffdL);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint64_t x32 = (x19 & 0x1fffffff);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e221m3/fesub.c b/src/Specific/montgomery64_2e221m3/fesub.c
index e2f5ff86c..f0106fd81 100644
--- a/src/Specific/montgomery64_2e221m3/fesub.c
+++ b/src/Specific/montgomery64_2e221m3/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xfffffffffffffffdL);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint64_t x41 = (x28 & 0x1fffffff);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xfffffffffffffffdL);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint64_t x41 = (x28 & 0x1fffffff);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e222m117/feadd.c b/src/Specific/montgomery64_2e222m117/feadd.c
index f812c899d..e1cc8be53 100644
--- a/src/Specific/montgomery64_2e222m117/feadd.c
+++ b/src/Specific/montgomery64_2e222m117/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffff8bL, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x3fffffff, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffff8bL, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x3fffffff, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e222m117/femul.c b/src/Specific/montgomery64_2e222m117/femul.c
index 052e9c5e4..91ca1c408 100644
--- a/src/Specific/montgomery64_2e222m117/femul.c
+++ b/src/Specific/montgomery64_2e222m117/femul.c
@@ -1,140 +1,130 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xcfdcfdcfdcfdcfddL, &_);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffff8bL, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
-{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x3fffffff, &x54);
-{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
-{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
-{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
-{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
-{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
-{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
-{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
-{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
-{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
-{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
-{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
-{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
-{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
-{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
-{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
-{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
-{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
-{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
-{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
-{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
-{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
-{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xcfdcfdcfdcfdcfddL, &_);
-{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xffffffffffffff8bL, &x126);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x3fffffff, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
-{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
-{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
-{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
-{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
-{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
-{ uint8_t x163 = (x162 + x120);
-{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
-{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
-{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
-{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
-{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
-{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
-{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
-{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
-{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
-{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
-{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
-{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xcfdcfdcfdcfdcfddL, &_);
-{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xffffffffffffff8bL, &x208);
-{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
-{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
-{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x3fffffff, &x217);
-{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
-{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
-{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
-{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
-{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
-{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
-{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
-{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
-{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
-{ uint8_t x245 = (x244 + x202);
-{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
-{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
-{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
-{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
-{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
-{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
-{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
-{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
-{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
-{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
-{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
-{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
-{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
-{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xcfdcfdcfdcfdcfddL, &_);
-{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xffffffffffffff8bL, &x290);
-{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
-{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
-{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x3fffffff, &x299);
-{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
-{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
-{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
-{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
-{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
-{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
-{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
-{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
-{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
-{ uint8_t x327 = (x326 + x284);
-{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xffffffffffffff8bL, &x329);
-{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
-{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
-{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x3fffffff, &x338);
-{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
-{ uint64_t x343 = cmovznz(x342, x338, x325);
-{ uint64_t x344 = cmovznz(x342, x335, x322);
-{ uint64_t x345 = cmovznz(x342, x332, x319);
-{ uint64_t x346 = cmovznz(x342, x329, x316);
-out[0] = x343;
-out[1] = x344;
-out[2] = x345;
-out[3] = x346;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xcfdcfdcfdcfdcfddL, &_);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffff8bL, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+ { uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x3fffffff, &x54);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+ { uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+ { uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+ { uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+ { uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+ { uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+ { uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+ { uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+ { uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+ { uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+ { uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+ { uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+ { uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+ { uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+ { uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+ { uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+ { uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+ { uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+ { uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+ { uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xcfdcfdcfdcfdcfddL, &_);
+ { uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xffffffffffffff8bL, &x126);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x3fffffff, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+ { uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+ { uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+ { uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+ { uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+ { uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+ { uint8_t x163 = (x162 + x120);
+ { uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+ { uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+ { uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+ { uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+ { uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+ { uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+ { uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+ { uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+ { uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+ { uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+ { uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+ { uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xcfdcfdcfdcfdcfddL, &_);
+ { uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xffffffffffffff8bL, &x208);
+ { uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+ { uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+ { uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x3fffffff, &x217);
+ { uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+ { uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+ { uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+ { uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+ { uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+ { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+ { uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+ { uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+ { uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+ { uint8_t x245 = (x244 + x202);
+ { uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+ { uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+ { uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+ { uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+ { uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+ { uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+ { uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+ { uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+ { uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+ { uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xcfdcfdcfdcfdcfddL, &_);
+ { uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xffffffffffffff8bL, &x290);
+ { uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+ { uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+ { uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x3fffffff, &x299);
+ { uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+ { uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+ { uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+ { uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+ { uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+ { uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+ { uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+ { uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+ { uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+ { uint8_t x327 = (x326 + x284);
+ { uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xffffffffffffff8bL, &x329);
+ { uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+ { uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+ { uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x3fffffff, &x338);
+ { uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+ { uint64_t x343 = cmovznz(x342, x338, x325);
+ { uint64_t x344 = cmovznz(x342, x335, x322);
+ { uint64_t x345 = cmovznz(x342, x332, x319);
+ { uint64_t x346 = cmovznz(x342, x329, x316);
+ out[0] = x346;
+ out[1] = x345;
+ out[2] = x344;
+ out[3] = x343;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e222m117/fenz.c b/src/Specific/montgomery64_2e222m117/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e222m117/fenz.c
+++ b/src/Specific/montgomery64_2e222m117/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e222m117/feopp.c b/src/Specific/montgomery64_2e222m117/feopp.c
index 673d57575..28602d0a9 100644
--- a/src/Specific/montgomery64_2e222m117/feopp.c
+++ b/src/Specific/montgomery64_2e222m117/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xffffffffffffff8bL);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint64_t x32 = (x19 & 0x3fffffff);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xffffffffffffff8bL);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint64_t x32 = (x19 & 0x3fffffff);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e222m117/fesub.c b/src/Specific/montgomery64_2e222m117/fesub.c
index 9d08573c6..8f43c55a0 100644
--- a/src/Specific/montgomery64_2e222m117/fesub.c
+++ b/src/Specific/montgomery64_2e222m117/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xffffffffffffff8bL);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint64_t x41 = (x28 & 0x3fffffff);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xffffffffffffff8bL);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint64_t x41 = (x28 & 0x3fffffff);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e224m2e96p1/feadd.c b/src/Specific/montgomery64_2e224m2e96p1/feadd.c
index 963bba047..cbe89cc4b 100644
--- a/src/Specific/montgomery64_2e224m2e96p1/feadd.c
+++ b/src/Specific/montgomery64_2e224m2e96p1/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0x1, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffff00000000L, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xffffffff, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0x1, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffff00000000L, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xffffffff, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e224m2e96p1/femul.c b/src/Specific/montgomery64_2e224m2e96p1/femul.c
index 9e0c971d6..16bd42b73 100644
--- a/src/Specific/montgomery64_2e224m2e96p1/femul.c
+++ b/src/Specific/montgomery64_2e224m2e96p1/femul.c
@@ -1,132 +1,122 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xffffffffffffffffL, &_);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffff00000000L, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffff, &x51);
-{ uint64_t x53; uint8_t x54 = _addcarryx_u64(0x0, x45, x47, &x53);
-{ uint64_t x56; uint8_t x57 = _addcarryx_u64(x54, x48, x50, &x56);
-{ uint64_t x59; uint8_t _ = _addcarryx_u64(0x0, x57, x51, &x59);
-{ uint64_t _; uint8_t x63 = _addcarryx_u64(0x0, x17, x41, &_);
-{ uint64_t x65; uint8_t x66 = _addcarryx_u64(x63, x29, x44, &x65);
-{ uint64_t x68; uint8_t x69 = _addcarryx_u64(x66, x32, x53, &x68);
-{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x35, x56, &x71);
-{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x38, x59, &x74);
-{ uint64_t x78; uint64_t x77 = _mulx_u64(x7, x11, &x78);
-{ uint64_t x81; uint64_t x80 = _mulx_u64(x7, x13, &x81);
-{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x15, &x84);
-{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x14, &x87);
-{ uint64_t x89; uint8_t x90 = _addcarryx_u64(0x0, x78, x80, &x89);
-{ uint64_t x92; uint8_t x93 = _addcarryx_u64(x90, x81, x83, &x92);
-{ uint64_t x95; uint8_t x96 = _addcarryx_u64(x93, x84, x86, &x95);
-{ uint64_t x98; uint8_t _ = _addcarryx_u64(0x0, x96, x87, &x98);
-{ uint64_t x101; uint8_t x102 = _addcarryx_u64(0x0, x65, x77, &x101);
-{ uint64_t x104; uint8_t x105 = _addcarryx_u64(x102, x68, x89, &x104);
-{ uint64_t x107; uint8_t x108 = _addcarryx_u64(x105, x71, x92, &x107);
-{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
-{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x75, x98, &x113);
-{ uint64_t _; uint64_t x116 = _mulx_u64(x101, 0xffffffffffffffffL, &_);
-{ uint64_t x120; uint64_t x119 = _mulx_u64(x116, 0xffffffff00000000L, &x120);
-{ uint64_t x123; uint64_t x122 = _mulx_u64(x116, 0xffffffffffffffffL, &x123);
-{ uint64_t x126; uint64_t x125 = _mulx_u64(x116, 0xffffffff, &x126);
-{ uint64_t x128; uint8_t x129 = _addcarryx_u64(0x0, x120, x122, &x128);
-{ uint64_t x131; uint8_t x132 = _addcarryx_u64(x129, x123, x125, &x131);
-{ uint64_t x134; uint8_t _ = _addcarryx_u64(0x0, x132, x126, &x134);
-{ uint64_t _; uint8_t x138 = _addcarryx_u64(0x0, x101, x116, &_);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x104, x119, &x140);
-{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x107, x128, &x143);
-{ uint64_t x146; uint8_t x147 = _addcarryx_u64(x144, x110, x131, &x146);
-{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x113, x134, &x149);
-{ uint8_t x151 = (x150 + x114);
-{ uint64_t x154; uint64_t x153 = _mulx_u64(x9, x11, &x154);
-{ uint64_t x157; uint64_t x156 = _mulx_u64(x9, x13, &x157);
-{ uint64_t x160; uint64_t x159 = _mulx_u64(x9, x15, &x160);
-{ uint64_t x163; uint64_t x162 = _mulx_u64(x9, x14, &x163);
-{ uint64_t x165; uint8_t x166 = _addcarryx_u64(0x0, x154, x156, &x165);
-{ uint64_t x168; uint8_t x169 = _addcarryx_u64(x166, x157, x159, &x168);
-{ uint64_t x171; uint8_t x172 = _addcarryx_u64(x169, x160, x162, &x171);
-{ uint64_t x174; uint8_t _ = _addcarryx_u64(0x0, x172, x163, &x174);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x140, x153, &x177);
-{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x143, x165, &x180);
-{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x146, x168, &x183);
-{ uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x149, x171, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x151, x174, &x189);
-{ uint64_t _; uint64_t x192 = _mulx_u64(x177, 0xffffffffffffffffL, &_);
-{ uint64_t x196; uint64_t x195 = _mulx_u64(x192, 0xffffffff00000000L, &x196);
-{ uint64_t x199; uint64_t x198 = _mulx_u64(x192, 0xffffffffffffffffL, &x199);
-{ uint64_t x202; uint64_t x201 = _mulx_u64(x192, 0xffffffff, &x202);
-{ uint64_t x204; uint8_t x205 = _addcarryx_u64(0x0, x196, x198, &x204);
-{ uint64_t x207; uint8_t x208 = _addcarryx_u64(x205, x199, x201, &x207);
-{ uint64_t x210; uint8_t _ = _addcarryx_u64(0x0, x208, x202, &x210);
-{ uint64_t _; uint8_t x214 = _addcarryx_u64(0x0, x177, x192, &_);
-{ uint64_t x216; uint8_t x217 = _addcarryx_u64(x214, x180, x195, &x216);
-{ uint64_t x219; uint8_t x220 = _addcarryx_u64(x217, x183, x204, &x219);
-{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x186, x207, &x222);
-{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x189, x210, &x225);
-{ uint8_t x227 = (x226 + x190);
-{ uint64_t x230; uint64_t x229 = _mulx_u64(x8, x11, &x230);
-{ uint64_t x233; uint64_t x232 = _mulx_u64(x8, x13, &x233);
-{ uint64_t x236; uint64_t x235 = _mulx_u64(x8, x15, &x236);
-{ uint64_t x239; uint64_t x238 = _mulx_u64(x8, x14, &x239);
-{ uint64_t x241; uint8_t x242 = _addcarryx_u64(0x0, x230, x232, &x241);
-{ uint64_t x244; uint8_t x245 = _addcarryx_u64(x242, x233, x235, &x244);
-{ uint64_t x247; uint8_t x248 = _addcarryx_u64(x245, x236, x238, &x247);
-{ uint64_t x250; uint8_t _ = _addcarryx_u64(0x0, x248, x239, &x250);
-{ uint64_t x253; uint8_t x254 = _addcarryx_u64(0x0, x216, x229, &x253);
-{ uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x219, x241, &x256);
-{ uint64_t x259; uint8_t x260 = _addcarryx_u64(x257, x222, x244, &x259);
-{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x225, x247, &x262);
-{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x227, x250, &x265);
-{ uint64_t _; uint64_t x268 = _mulx_u64(x253, 0xffffffffffffffffL, &_);
-{ uint64_t x272; uint64_t x271 = _mulx_u64(x268, 0xffffffff00000000L, &x272);
-{ uint64_t x275; uint64_t x274 = _mulx_u64(x268, 0xffffffffffffffffL, &x275);
-{ uint64_t x278; uint64_t x277 = _mulx_u64(x268, 0xffffffff, &x278);
-{ uint64_t x280; uint8_t x281 = _addcarryx_u64(0x0, x272, x274, &x280);
-{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x275, x277, &x283);
-{ uint64_t x286; uint8_t _ = _addcarryx_u64(0x0, x284, x278, &x286);
-{ uint64_t _; uint8_t x290 = _addcarryx_u64(0x0, x253, x268, &_);
-{ uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x256, x271, &x292);
-{ uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x259, x280, &x295);
-{ uint64_t x298; uint8_t x299 = _addcarryx_u64(x296, x262, x283, &x298);
-{ uint64_t x301; uint8_t x302 = _addcarryx_u64(x299, x265, x286, &x301);
-{ uint8_t x303 = (x302 + x266);
-{ uint64_t x305; uint8_t x306 = _subborrow_u64(0x0, x292, 0x1, &x305);
-{ uint64_t x308; uint8_t x309 = _subborrow_u64(x306, x295, 0xffffffff00000000L, &x308);
-{ uint64_t x311; uint8_t x312 = _subborrow_u64(x309, x298, 0xffffffffffffffffL, &x311);
-{ uint64_t x314; uint8_t x315 = _subborrow_u64(x312, x301, 0xffffffff, &x314);
-{ uint64_t _; uint8_t x318 = _subborrow_u64(x315, x303, 0x0, &_);
-{ uint64_t x319 = cmovznz(x318, x314, x301);
-{ uint64_t x320 = cmovznz(x318, x311, x298);
-{ uint64_t x321 = cmovznz(x318, x308, x295);
-{ uint64_t x322 = cmovznz(x318, x305, x292);
-out[0] = x319;
-out[1] = x320;
-out[2] = x321;
-out[3] = x322;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xffffffffffffffffL, &_);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffff00000000L, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffff, &x51);
+ { uint64_t x53; uint8_t x54 = _addcarryx_u64(0x0, x45, x47, &x53);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(x54, x48, x50, &x56);
+ { uint64_t x59; uint8_t _ = _addcarryx_u64(0x0, x57, x51, &x59);
+ { uint64_t _; uint8_t x63 = _addcarryx_u64(0x0, x17, x41, &_);
+ { uint64_t x65; uint8_t x66 = _addcarryx_u64(x63, x29, x44, &x65);
+ { uint64_t x68; uint8_t x69 = _addcarryx_u64(x66, x32, x53, &x68);
+ { uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x35, x56, &x71);
+ { uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x38, x59, &x74);
+ { uint64_t x78; uint64_t x77 = _mulx_u64(x7, x11, &x78);
+ { uint64_t x81; uint64_t x80 = _mulx_u64(x7, x13, &x81);
+ { uint64_t x84; uint64_t x83 = _mulx_u64(x7, x15, &x84);
+ { uint64_t x87; uint64_t x86 = _mulx_u64(x7, x14, &x87);
+ { uint64_t x89; uint8_t x90 = _addcarryx_u64(0x0, x78, x80, &x89);
+ { uint64_t x92; uint8_t x93 = _addcarryx_u64(x90, x81, x83, &x92);
+ { uint64_t x95; uint8_t x96 = _addcarryx_u64(x93, x84, x86, &x95);
+ { uint64_t x98; uint8_t _ = _addcarryx_u64(0x0, x96, x87, &x98);
+ { uint64_t x101; uint8_t x102 = _addcarryx_u64(0x0, x65, x77, &x101);
+ { uint64_t x104; uint8_t x105 = _addcarryx_u64(x102, x68, x89, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(x105, x71, x92, &x107);
+ { uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+ { uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x75, x98, &x113);
+ { uint64_t _; uint64_t x116 = _mulx_u64(x101, 0xffffffffffffffffL, &_);
+ { uint64_t x120; uint64_t x119 = _mulx_u64(x116, 0xffffffff00000000L, &x120);
+ { uint64_t x123; uint64_t x122 = _mulx_u64(x116, 0xffffffffffffffffL, &x123);
+ { uint64_t x126; uint64_t x125 = _mulx_u64(x116, 0xffffffff, &x126);
+ { uint64_t x128; uint8_t x129 = _addcarryx_u64(0x0, x120, x122, &x128);
+ { uint64_t x131; uint8_t x132 = _addcarryx_u64(x129, x123, x125, &x131);
+ { uint64_t x134; uint8_t _ = _addcarryx_u64(0x0, x132, x126, &x134);
+ { uint64_t _; uint8_t x138 = _addcarryx_u64(0x0, x101, x116, &_);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x104, x119, &x140);
+ { uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x107, x128, &x143);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(x144, x110, x131, &x146);
+ { uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x113, x134, &x149);
+ { uint8_t x151 = (x150 + x114);
+ { uint64_t x154; uint64_t x153 = _mulx_u64(x9, x11, &x154);
+ { uint64_t x157; uint64_t x156 = _mulx_u64(x9, x13, &x157);
+ { uint64_t x160; uint64_t x159 = _mulx_u64(x9, x15, &x160);
+ { uint64_t x163; uint64_t x162 = _mulx_u64(x9, x14, &x163);
+ { uint64_t x165; uint8_t x166 = _addcarryx_u64(0x0, x154, x156, &x165);
+ { uint64_t x168; uint8_t x169 = _addcarryx_u64(x166, x157, x159, &x168);
+ { uint64_t x171; uint8_t x172 = _addcarryx_u64(x169, x160, x162, &x171);
+ { uint64_t x174; uint8_t _ = _addcarryx_u64(0x0, x172, x163, &x174);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x140, x153, &x177);
+ { uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x143, x165, &x180);
+ { uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x146, x168, &x183);
+ { uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x149, x171, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x151, x174, &x189);
+ { uint64_t _; uint64_t x192 = _mulx_u64(x177, 0xffffffffffffffffL, &_);
+ { uint64_t x196; uint64_t x195 = _mulx_u64(x192, 0xffffffff00000000L, &x196);
+ { uint64_t x199; uint64_t x198 = _mulx_u64(x192, 0xffffffffffffffffL, &x199);
+ { uint64_t x202; uint64_t x201 = _mulx_u64(x192, 0xffffffff, &x202);
+ { uint64_t x204; uint8_t x205 = _addcarryx_u64(0x0, x196, x198, &x204);
+ { uint64_t x207; uint8_t x208 = _addcarryx_u64(x205, x199, x201, &x207);
+ { uint64_t x210; uint8_t _ = _addcarryx_u64(0x0, x208, x202, &x210);
+ { uint64_t _; uint8_t x214 = _addcarryx_u64(0x0, x177, x192, &_);
+ { uint64_t x216; uint8_t x217 = _addcarryx_u64(x214, x180, x195, &x216);
+ { uint64_t x219; uint8_t x220 = _addcarryx_u64(x217, x183, x204, &x219);
+ { uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x186, x207, &x222);
+ { uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x189, x210, &x225);
+ { uint8_t x227 = (x226 + x190);
+ { uint64_t x230; uint64_t x229 = _mulx_u64(x8, x11, &x230);
+ { uint64_t x233; uint64_t x232 = _mulx_u64(x8, x13, &x233);
+ { uint64_t x236; uint64_t x235 = _mulx_u64(x8, x15, &x236);
+ { uint64_t x239; uint64_t x238 = _mulx_u64(x8, x14, &x239);
+ { uint64_t x241; uint8_t x242 = _addcarryx_u64(0x0, x230, x232, &x241);
+ { uint64_t x244; uint8_t x245 = _addcarryx_u64(x242, x233, x235, &x244);
+ { uint64_t x247; uint8_t x248 = _addcarryx_u64(x245, x236, x238, &x247);
+ { uint64_t x250; uint8_t _ = _addcarryx_u64(0x0, x248, x239, &x250);
+ { uint64_t x253; uint8_t x254 = _addcarryx_u64(0x0, x216, x229, &x253);
+ { uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x219, x241, &x256);
+ { uint64_t x259; uint8_t x260 = _addcarryx_u64(x257, x222, x244, &x259);
+ { uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x225, x247, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x227, x250, &x265);
+ { uint64_t _; uint64_t x268 = _mulx_u64(x253, 0xffffffffffffffffL, &_);
+ { uint64_t x272; uint64_t x271 = _mulx_u64(x268, 0xffffffff00000000L, &x272);
+ { uint64_t x275; uint64_t x274 = _mulx_u64(x268, 0xffffffffffffffffL, &x275);
+ { uint64_t x278; uint64_t x277 = _mulx_u64(x268, 0xffffffff, &x278);
+ { uint64_t x280; uint8_t x281 = _addcarryx_u64(0x0, x272, x274, &x280);
+ { uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x275, x277, &x283);
+ { uint64_t x286; uint8_t _ = _addcarryx_u64(0x0, x284, x278, &x286);
+ { uint64_t _; uint8_t x290 = _addcarryx_u64(0x0, x253, x268, &_);
+ { uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x256, x271, &x292);
+ { uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x259, x280, &x295);
+ { uint64_t x298; uint8_t x299 = _addcarryx_u64(x296, x262, x283, &x298);
+ { uint64_t x301; uint8_t x302 = _addcarryx_u64(x299, x265, x286, &x301);
+ { uint8_t x303 = (x302 + x266);
+ { uint64_t x305; uint8_t x306 = _subborrow_u64(0x0, x292, 0x1, &x305);
+ { uint64_t x308; uint8_t x309 = _subborrow_u64(x306, x295, 0xffffffff00000000L, &x308);
+ { uint64_t x311; uint8_t x312 = _subborrow_u64(x309, x298, 0xffffffffffffffffL, &x311);
+ { uint64_t x314; uint8_t x315 = _subborrow_u64(x312, x301, 0xffffffff, &x314);
+ { uint64_t _; uint8_t x318 = _subborrow_u64(x315, x303, 0x0, &_);
+ { uint64_t x319 = cmovznz(x318, x314, x301);
+ { uint64_t x320 = cmovznz(x318, x311, x298);
+ { uint64_t x321 = cmovznz(x318, x308, x295);
+ { uint64_t x322 = cmovznz(x318, x305, x292);
+ out[0] = x322;
+ out[1] = x321;
+ out[2] = x320;
+ out[3] = x319;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e224m2e96p1/fenz.c b/src/Specific/montgomery64_2e224m2e96p1/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e224m2e96p1/fenz.c
+++ b/src/Specific/montgomery64_2e224m2e96p1/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e224m2e96p1/feopp.c b/src/Specific/montgomery64_2e224m2e96p1/feopp.c
index ec5edf772..79c6bb9a1 100644
--- a/src/Specific/montgomery64_2e224m2e96p1/feopp.c
+++ b/src/Specific/montgomery64_2e224m2e96p1/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint8_t x20 = ((uint8_t)x19 & 0x1);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffff00000000L);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint64_t x32 = (x19 & 0xffffffff);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint8_t x20 = ((uint8_t)x19 & 0x1);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffff00000000L);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint64_t x32 = (x19 & 0xffffffff);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e224m2e96p1/fesub.c b/src/Specific/montgomery64_2e224m2e96p1/fesub.c
index 454d1c390..30e07cb2f 100644
--- a/src/Specific/montgomery64_2e224m2e96p1/fesub.c
+++ b/src/Specific/montgomery64_2e224m2e96p1/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint8_t x29 = ((uint8_t)x28 & 0x1);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffff00000000L);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint64_t x41 = (x28 & 0xffffffff);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint8_t x29 = ((uint8_t)x28 & 0x1);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffff00000000L);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint64_t x41 = (x28 & 0xffffffff);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e226m5/feadd.c b/src/Specific/montgomery64_2e226m5/feadd.c
index 4ab8f6968..0f9ca95c8 100644
--- a/src/Specific/montgomery64_2e226m5/feadd.c
+++ b/src/Specific/montgomery64_2e226m5/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffffbL, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x3ffffffff, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffffbL, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x3ffffffff, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e226m5/femul.c b/src/Specific/montgomery64_2e226m5/femul.c
index fb1de9174..c5c47b343 100644
--- a/src/Specific/montgomery64_2e226m5/femul.c
+++ b/src/Specific/montgomery64_2e226m5/femul.c
@@ -1,140 +1,130 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xcccccccccccccccdL, &_);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffffbL, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
-{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x3ffffffff, &x54);
-{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
-{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
-{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
-{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
-{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
-{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
-{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
-{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
-{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
-{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
-{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
-{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
-{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
-{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
-{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
-{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
-{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
-{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
-{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
-{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
-{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
-{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xcccccccccccccccdL, &_);
-{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffffbL, &x126);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x3ffffffff, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
-{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
-{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
-{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
-{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
-{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
-{ uint8_t x163 = (x162 + x120);
-{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
-{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
-{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
-{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
-{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
-{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
-{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
-{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
-{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
-{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
-{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
-{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xcccccccccccccccdL, &_);
-{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffffbL, &x208);
-{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
-{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
-{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x3ffffffff, &x217);
-{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
-{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
-{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
-{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
-{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
-{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
-{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
-{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
-{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
-{ uint8_t x245 = (x244 + x202);
-{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
-{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
-{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
-{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
-{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
-{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
-{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
-{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
-{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
-{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
-{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
-{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
-{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
-{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xcccccccccccccccdL, &_);
-{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffffbL, &x290);
-{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
-{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
-{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x3ffffffff, &x299);
-{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
-{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
-{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
-{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
-{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
-{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
-{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
-{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
-{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
-{ uint8_t x327 = (x326 + x284);
-{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffffbL, &x329);
-{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
-{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
-{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x3ffffffff, &x338);
-{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
-{ uint64_t x343 = cmovznz(x342, x338, x325);
-{ uint64_t x344 = cmovznz(x342, x335, x322);
-{ uint64_t x345 = cmovznz(x342, x332, x319);
-{ uint64_t x346 = cmovznz(x342, x329, x316);
-out[0] = x343;
-out[1] = x344;
-out[2] = x345;
-out[3] = x346;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xcccccccccccccccdL, &_);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffffbL, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+ { uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x3ffffffff, &x54);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+ { uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+ { uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+ { uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+ { uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+ { uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+ { uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+ { uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+ { uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+ { uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+ { uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+ { uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+ { uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+ { uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+ { uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+ { uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+ { uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+ { uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+ { uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+ { uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xcccccccccccccccdL, &_);
+ { uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffffbL, &x126);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x3ffffffff, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+ { uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+ { uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+ { uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+ { uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+ { uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+ { uint8_t x163 = (x162 + x120);
+ { uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+ { uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+ { uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+ { uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+ { uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+ { uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+ { uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+ { uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+ { uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+ { uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+ { uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+ { uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xcccccccccccccccdL, &_);
+ { uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffffbL, &x208);
+ { uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+ { uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+ { uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x3ffffffff, &x217);
+ { uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+ { uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+ { uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+ { uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+ { uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+ { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+ { uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+ { uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+ { uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+ { uint8_t x245 = (x244 + x202);
+ { uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+ { uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+ { uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+ { uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+ { uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+ { uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+ { uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+ { uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+ { uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+ { uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xcccccccccccccccdL, &_);
+ { uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffffbL, &x290);
+ { uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+ { uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+ { uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x3ffffffff, &x299);
+ { uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+ { uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+ { uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+ { uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+ { uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+ { uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+ { uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+ { uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+ { uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+ { uint8_t x327 = (x326 + x284);
+ { uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffffbL, &x329);
+ { uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+ { uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+ { uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x3ffffffff, &x338);
+ { uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+ { uint64_t x343 = cmovznz(x342, x338, x325);
+ { uint64_t x344 = cmovznz(x342, x335, x322);
+ { uint64_t x345 = cmovznz(x342, x332, x319);
+ { uint64_t x346 = cmovznz(x342, x329, x316);
+ out[0] = x346;
+ out[1] = x345;
+ out[2] = x344;
+ out[3] = x343;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e226m5/fenz.c b/src/Specific/montgomery64_2e226m5/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e226m5/fenz.c
+++ b/src/Specific/montgomery64_2e226m5/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e226m5/feopp.c b/src/Specific/montgomery64_2e226m5/feopp.c
index edcbefbd9..3ad87f131 100644
--- a/src/Specific/montgomery64_2e226m5/feopp.c
+++ b/src/Specific/montgomery64_2e226m5/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xfffffffffffffffbL);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint64_t x32 = (x19 & 0x3ffffffff);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xfffffffffffffffbL);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint64_t x32 = (x19 & 0x3ffffffff);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e226m5/fesub.c b/src/Specific/montgomery64_2e226m5/fesub.c
index 6230b9891..e78610512 100644
--- a/src/Specific/montgomery64_2e226m5/fesub.c
+++ b/src/Specific/montgomery64_2e226m5/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xfffffffffffffffbL);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint64_t x41 = (x28 & 0x3ffffffff);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xfffffffffffffffbL);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint64_t x41 = (x28 & 0x3ffffffff);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e230m27/feadd.c b/src/Specific/montgomery64_2e230m27/feadd.c
index b4afa6349..fea2534c3 100644
--- a/src/Specific/montgomery64_2e230m27/feadd.c
+++ b/src/Specific/montgomery64_2e230m27/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffe5L, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x3fffffffff, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffe5L, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x3fffffffff, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e230m27/femul.c b/src/Specific/montgomery64_2e230m27/femul.c
index bde43f07c..387681d81 100644
--- a/src/Specific/montgomery64_2e230m27/femul.c
+++ b/src/Specific/montgomery64_2e230m27/femul.c
@@ -1,140 +1,130 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0x84bda12f684bda13L, &_);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffffe5L, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
-{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x3fffffffff, &x54);
-{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
-{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
-{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
-{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
-{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
-{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
-{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
-{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
-{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
-{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
-{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
-{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
-{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
-{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
-{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
-{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
-{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
-{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
-{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
-{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
-{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
-{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0x84bda12f684bda13L, &_);
-{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xffffffffffffffe5L, &x126);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x3fffffffff, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
-{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
-{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
-{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
-{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
-{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
-{ uint8_t x163 = (x162 + x120);
-{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
-{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
-{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
-{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
-{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
-{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
-{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
-{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
-{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
-{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
-{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
-{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0x84bda12f684bda13L, &_);
-{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xffffffffffffffe5L, &x208);
-{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
-{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
-{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x3fffffffff, &x217);
-{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
-{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
-{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
-{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
-{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
-{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
-{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
-{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
-{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
-{ uint8_t x245 = (x244 + x202);
-{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
-{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
-{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
-{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
-{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
-{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
-{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
-{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
-{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
-{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
-{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
-{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
-{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
-{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0x84bda12f684bda13L, &_);
-{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xffffffffffffffe5L, &x290);
-{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
-{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
-{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x3fffffffff, &x299);
-{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
-{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
-{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
-{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
-{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
-{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
-{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
-{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
-{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
-{ uint8_t x327 = (x326 + x284);
-{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xffffffffffffffe5L, &x329);
-{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
-{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
-{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x3fffffffff, &x338);
-{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
-{ uint64_t x343 = cmovznz(x342, x338, x325);
-{ uint64_t x344 = cmovznz(x342, x335, x322);
-{ uint64_t x345 = cmovznz(x342, x332, x319);
-{ uint64_t x346 = cmovznz(x342, x329, x316);
-out[0] = x343;
-out[1] = x344;
-out[2] = x345;
-out[3] = x346;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t _; uint64_t x41 = _mulx_u64(x17, 0x84bda12f684bda13L, &_);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffffe5L, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+ { uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x3fffffffff, &x54);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+ { uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+ { uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+ { uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+ { uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+ { uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+ { uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+ { uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+ { uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+ { uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+ { uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+ { uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+ { uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+ { uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+ { uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+ { uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+ { uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+ { uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+ { uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+ { uint64_t _; uint64_t x122 = _mulx_u64(x107, 0x84bda12f684bda13L, &_);
+ { uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xffffffffffffffe5L, &x126);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x3fffffffff, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+ { uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+ { uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+ { uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+ { uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+ { uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+ { uint8_t x163 = (x162 + x120);
+ { uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+ { uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+ { uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+ { uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+ { uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+ { uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+ { uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+ { uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+ { uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+ { uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+ { uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+ { uint64_t _; uint64_t x204 = _mulx_u64(x189, 0x84bda12f684bda13L, &_);
+ { uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xffffffffffffffe5L, &x208);
+ { uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+ { uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+ { uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x3fffffffff, &x217);
+ { uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+ { uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+ { uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+ { uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+ { uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+ { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+ { uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+ { uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+ { uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+ { uint8_t x245 = (x244 + x202);
+ { uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+ { uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+ { uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+ { uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+ { uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+ { uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+ { uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+ { uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+ { uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+ { uint64_t _; uint64_t x286 = _mulx_u64(x271, 0x84bda12f684bda13L, &_);
+ { uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xffffffffffffffe5L, &x290);
+ { uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+ { uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+ { uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x3fffffffff, &x299);
+ { uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+ { uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+ { uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+ { uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+ { uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+ { uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+ { uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+ { uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+ { uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+ { uint8_t x327 = (x326 + x284);
+ { uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xffffffffffffffe5L, &x329);
+ { uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+ { uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+ { uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x3fffffffff, &x338);
+ { uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+ { uint64_t x343 = cmovznz(x342, x338, x325);
+ { uint64_t x344 = cmovznz(x342, x335, x322);
+ { uint64_t x345 = cmovznz(x342, x332, x319);
+ { uint64_t x346 = cmovznz(x342, x329, x316);
+ out[0] = x346;
+ out[1] = x345;
+ out[2] = x344;
+ out[3] = x343;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e230m27/fenz.c b/src/Specific/montgomery64_2e230m27/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e230m27/fenz.c
+++ b/src/Specific/montgomery64_2e230m27/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e230m27/feopp.c b/src/Specific/montgomery64_2e230m27/feopp.c
index 8dc2603ae..721a54ccc 100644
--- a/src/Specific/montgomery64_2e230m27/feopp.c
+++ b/src/Specific/montgomery64_2e230m27/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xffffffffffffffe5L);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint64_t x32 = (x19 & 0x3fffffffff);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xffffffffffffffe5L);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint64_t x32 = (x19 & 0x3fffffffff);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e230m27/fesub.c b/src/Specific/montgomery64_2e230m27/fesub.c
index f593c02cc..4c915b813 100644
--- a/src/Specific/montgomery64_2e230m27/fesub.c
+++ b/src/Specific/montgomery64_2e230m27/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xffffffffffffffe5L);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint64_t x41 = (x28 & 0x3fffffffff);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xffffffffffffffe5L);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint64_t x41 = (x28 & 0x3fffffffff);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e235m15/feadd.c b/src/Specific/montgomery64_2e235m15/feadd.c
index eae9f264a..1d7e9c091 100644
--- a/src/Specific/montgomery64_2e235m15/feadd.c
+++ b/src/Specific/montgomery64_2e235m15/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffff1L, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x7ffffffffff, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffff1L, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x7ffffffffff, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e235m15/femul.c b/src/Specific/montgomery64_2e235m15/femul.c
index 007baab53..270793a9c 100644
--- a/src/Specific/montgomery64_2e235m15/femul.c
+++ b/src/Specific/montgomery64_2e235m15/femul.c
@@ -1,140 +1,130 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xeeeeeeeeeeeeeeefL, &_);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffff1L, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
-{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x7ffffffffff, &x54);
-{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
-{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
-{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
-{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
-{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
-{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
-{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
-{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
-{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
-{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
-{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
-{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
-{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
-{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
-{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
-{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
-{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
-{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
-{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
-{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
-{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
-{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xeeeeeeeeeeeeeeefL, &_);
-{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffff1L, &x126);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x7ffffffffff, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
-{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
-{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
-{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
-{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
-{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
-{ uint8_t x163 = (x162 + x120);
-{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
-{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
-{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
-{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
-{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
-{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
-{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
-{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
-{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
-{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
-{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
-{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xeeeeeeeeeeeeeeefL, &_);
-{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffff1L, &x208);
-{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
-{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
-{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x7ffffffffff, &x217);
-{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
-{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
-{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
-{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
-{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
-{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
-{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
-{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
-{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
-{ uint8_t x245 = (x244 + x202);
-{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
-{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
-{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
-{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
-{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
-{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
-{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
-{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
-{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
-{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
-{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
-{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
-{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
-{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xeeeeeeeeeeeeeeefL, &_);
-{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffff1L, &x290);
-{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
-{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
-{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x7ffffffffff, &x299);
-{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
-{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
-{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
-{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
-{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
-{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
-{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
-{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
-{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
-{ uint8_t x327 = (x326 + x284);
-{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffff1L, &x329);
-{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
-{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
-{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x7ffffffffff, &x338);
-{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
-{ uint64_t x343 = cmovznz(x342, x338, x325);
-{ uint64_t x344 = cmovznz(x342, x335, x322);
-{ uint64_t x345 = cmovznz(x342, x332, x319);
-{ uint64_t x346 = cmovznz(x342, x329, x316);
-out[0] = x343;
-out[1] = x344;
-out[2] = x345;
-out[3] = x346;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xeeeeeeeeeeeeeeefL, &_);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffff1L, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+ { uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x7ffffffffff, &x54);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+ { uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+ { uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+ { uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+ { uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+ { uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+ { uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+ { uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+ { uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+ { uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+ { uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+ { uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+ { uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+ { uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+ { uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+ { uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+ { uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+ { uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+ { uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+ { uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xeeeeeeeeeeeeeeefL, &_);
+ { uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffff1L, &x126);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x7ffffffffff, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+ { uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+ { uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+ { uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+ { uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+ { uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+ { uint8_t x163 = (x162 + x120);
+ { uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+ { uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+ { uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+ { uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+ { uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+ { uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+ { uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+ { uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+ { uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+ { uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+ { uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+ { uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xeeeeeeeeeeeeeeefL, &_);
+ { uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffff1L, &x208);
+ { uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+ { uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+ { uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x7ffffffffff, &x217);
+ { uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+ { uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+ { uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+ { uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+ { uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+ { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+ { uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+ { uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+ { uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+ { uint8_t x245 = (x244 + x202);
+ { uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+ { uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+ { uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+ { uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+ { uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+ { uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+ { uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+ { uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+ { uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+ { uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xeeeeeeeeeeeeeeefL, &_);
+ { uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffff1L, &x290);
+ { uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+ { uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+ { uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x7ffffffffff, &x299);
+ { uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+ { uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+ { uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+ { uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+ { uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+ { uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+ { uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+ { uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+ { uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+ { uint8_t x327 = (x326 + x284);
+ { uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffff1L, &x329);
+ { uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+ { uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+ { uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x7ffffffffff, &x338);
+ { uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+ { uint64_t x343 = cmovznz(x342, x338, x325);
+ { uint64_t x344 = cmovznz(x342, x335, x322);
+ { uint64_t x345 = cmovznz(x342, x332, x319);
+ { uint64_t x346 = cmovznz(x342, x329, x316);
+ out[0] = x346;
+ out[1] = x345;
+ out[2] = x344;
+ out[3] = x343;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e235m15/fenz.c b/src/Specific/montgomery64_2e235m15/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e235m15/fenz.c
+++ b/src/Specific/montgomery64_2e235m15/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e235m15/feopp.c b/src/Specific/montgomery64_2e235m15/feopp.c
index cc6fc1a86..96fd89b75 100644
--- a/src/Specific/montgomery64_2e235m15/feopp.c
+++ b/src/Specific/montgomery64_2e235m15/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xfffffffffffffff1L);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint64_t x32 = (x19 & 0x7ffffffffff);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xfffffffffffffff1L);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint64_t x32 = (x19 & 0x7ffffffffff);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e235m15/fesub.c b/src/Specific/montgomery64_2e235m15/fesub.c
index c160c373d..86d67d863 100644
--- a/src/Specific/montgomery64_2e235m15/fesub.c
+++ b/src/Specific/montgomery64_2e235m15/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xfffffffffffffff1L);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint64_t x41 = (x28 & 0x7ffffffffff);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xfffffffffffffff1L);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint64_t x41 = (x28 & 0x7ffffffffff);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e243m9/feadd.c b/src/Specific/montgomery64_2e243m9/feadd.c
index 82067f831..24c69e18c 100644
--- a/src/Specific/montgomery64_2e243m9/feadd.c
+++ b/src/Specific/montgomery64_2e243m9/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffff7L, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x7ffffffffffff, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffff7L, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x7ffffffffffff, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e243m9/femul.c b/src/Specific/montgomery64_2e243m9/femul.c
index f6ed60fc9..b9fd4460b 100644
--- a/src/Specific/montgomery64_2e243m9/femul.c
+++ b/src/Specific/montgomery64_2e243m9/femul.c
@@ -1,140 +1,130 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0x8e38e38e38e38e39L, &_);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffff7L, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
-{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x7ffffffffffff, &x54);
-{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
-{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
-{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
-{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
-{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
-{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
-{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
-{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
-{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
-{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
-{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
-{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
-{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
-{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
-{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
-{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
-{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
-{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
-{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
-{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
-{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
-{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0x8e38e38e38e38e39L, &_);
-{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffff7L, &x126);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x7ffffffffffff, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
-{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
-{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
-{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
-{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
-{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
-{ uint8_t x163 = (x162 + x120);
-{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
-{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
-{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
-{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
-{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
-{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
-{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
-{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
-{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
-{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
-{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
-{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0x8e38e38e38e38e39L, &_);
-{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffff7L, &x208);
-{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
-{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
-{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x7ffffffffffff, &x217);
-{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
-{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
-{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
-{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
-{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
-{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
-{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
-{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
-{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
-{ uint8_t x245 = (x244 + x202);
-{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
-{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
-{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
-{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
-{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
-{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
-{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
-{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
-{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
-{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
-{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
-{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
-{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
-{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0x8e38e38e38e38e39L, &_);
-{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffff7L, &x290);
-{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
-{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
-{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x7ffffffffffff, &x299);
-{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
-{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
-{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
-{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
-{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
-{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
-{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
-{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
-{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
-{ uint8_t x327 = (x326 + x284);
-{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffff7L, &x329);
-{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
-{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
-{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x7ffffffffffff, &x338);
-{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
-{ uint64_t x343 = cmovznz(x342, x338, x325);
-{ uint64_t x344 = cmovznz(x342, x335, x322);
-{ uint64_t x345 = cmovznz(x342, x332, x319);
-{ uint64_t x346 = cmovznz(x342, x329, x316);
-out[0] = x343;
-out[1] = x344;
-out[2] = x345;
-out[3] = x346;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t _; uint64_t x41 = _mulx_u64(x17, 0x8e38e38e38e38e39L, &_);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffff7L, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+ { uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x7ffffffffffff, &x54);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+ { uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+ { uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+ { uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+ { uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+ { uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+ { uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+ { uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+ { uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+ { uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+ { uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+ { uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+ { uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+ { uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+ { uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+ { uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+ { uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+ { uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+ { uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+ { uint64_t _; uint64_t x122 = _mulx_u64(x107, 0x8e38e38e38e38e39L, &_);
+ { uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffff7L, &x126);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x7ffffffffffff, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+ { uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+ { uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+ { uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+ { uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+ { uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+ { uint8_t x163 = (x162 + x120);
+ { uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+ { uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+ { uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+ { uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+ { uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+ { uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+ { uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+ { uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+ { uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+ { uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+ { uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+ { uint64_t _; uint64_t x204 = _mulx_u64(x189, 0x8e38e38e38e38e39L, &_);
+ { uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffff7L, &x208);
+ { uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+ { uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+ { uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x7ffffffffffff, &x217);
+ { uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+ { uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+ { uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+ { uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+ { uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+ { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+ { uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+ { uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+ { uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+ { uint8_t x245 = (x244 + x202);
+ { uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+ { uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+ { uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+ { uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+ { uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+ { uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+ { uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+ { uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+ { uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+ { uint64_t _; uint64_t x286 = _mulx_u64(x271, 0x8e38e38e38e38e39L, &_);
+ { uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffff7L, &x290);
+ { uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+ { uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+ { uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x7ffffffffffff, &x299);
+ { uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+ { uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+ { uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+ { uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+ { uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+ { uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+ { uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+ { uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+ { uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+ { uint8_t x327 = (x326 + x284);
+ { uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffff7L, &x329);
+ { uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+ { uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+ { uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x7ffffffffffff, &x338);
+ { uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+ { uint64_t x343 = cmovznz(x342, x338, x325);
+ { uint64_t x344 = cmovznz(x342, x335, x322);
+ { uint64_t x345 = cmovznz(x342, x332, x319);
+ { uint64_t x346 = cmovznz(x342, x329, x316);
+ out[0] = x346;
+ out[1] = x345;
+ out[2] = x344;
+ out[3] = x343;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e243m9/fenz.c b/src/Specific/montgomery64_2e243m9/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e243m9/fenz.c
+++ b/src/Specific/montgomery64_2e243m9/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e243m9/feopp.c b/src/Specific/montgomery64_2e243m9/feopp.c
index 7fcf38323..0a9f8ea30 100644
--- a/src/Specific/montgomery64_2e243m9/feopp.c
+++ b/src/Specific/montgomery64_2e243m9/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xfffffffffffffff7L);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint64_t x32 = (x19 & 0x7ffffffffffff);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xfffffffffffffff7L);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint64_t x32 = (x19 & 0x7ffffffffffff);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e243m9/fesub.c b/src/Specific/montgomery64_2e243m9/fesub.c
index e02f6adfa..43a5b19bd 100644
--- a/src/Specific/montgomery64_2e243m9/fesub.c
+++ b/src/Specific/montgomery64_2e243m9/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xfffffffffffffff7L);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint64_t x41 = (x28 & 0x7ffffffffffff);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xfffffffffffffff7L);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint64_t x41 = (x28 & 0x7ffffffffffff);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e251m9/feadd.c b/src/Specific/montgomery64_2e251m9/feadd.c
index e6013e94a..97273bf73 100644
--- a/src/Specific/montgomery64_2e251m9/feadd.c
+++ b/src/Specific/montgomery64_2e251m9/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffff7L, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x7ffffffffffffff, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffff7L, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x7ffffffffffffff, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e251m9/femul.c b/src/Specific/montgomery64_2e251m9/femul.c
index c1a768f4d..2baaf615f 100644
--- a/src/Specific/montgomery64_2e251m9/femul.c
+++ b/src/Specific/montgomery64_2e251m9/femul.c
@@ -1,140 +1,130 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0x8e38e38e38e38e39L, &_);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffff7L, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
-{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x7ffffffffffffff, &x54);
-{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
-{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
-{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
-{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
-{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
-{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
-{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
-{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
-{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
-{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
-{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
-{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
-{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
-{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
-{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
-{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
-{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
-{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
-{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
-{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
-{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
-{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0x8e38e38e38e38e39L, &_);
-{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffff7L, &x126);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x7ffffffffffffff, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
-{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
-{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
-{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
-{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
-{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
-{ uint8_t x163 = (x162 + x120);
-{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
-{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
-{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
-{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
-{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
-{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
-{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
-{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
-{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
-{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
-{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
-{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0x8e38e38e38e38e39L, &_);
-{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffff7L, &x208);
-{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
-{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
-{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x7ffffffffffffff, &x217);
-{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
-{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
-{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
-{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
-{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
-{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
-{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
-{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
-{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
-{ uint8_t x245 = (x244 + x202);
-{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
-{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
-{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
-{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
-{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
-{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
-{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
-{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
-{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
-{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
-{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
-{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
-{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
-{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0x8e38e38e38e38e39L, &_);
-{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffff7L, &x290);
-{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
-{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
-{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x7ffffffffffffff, &x299);
-{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
-{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
-{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
-{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
-{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
-{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
-{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
-{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
-{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
-{ uint8_t x327 = (x326 + x284);
-{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffff7L, &x329);
-{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
-{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
-{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x7ffffffffffffff, &x338);
-{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
-{ uint64_t x343 = cmovznz(x342, x338, x325);
-{ uint64_t x344 = cmovznz(x342, x335, x322);
-{ uint64_t x345 = cmovznz(x342, x332, x319);
-{ uint64_t x346 = cmovznz(x342, x329, x316);
-out[0] = x343;
-out[1] = x344;
-out[2] = x345;
-out[3] = x346;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t _; uint64_t x41 = _mulx_u64(x17, 0x8e38e38e38e38e39L, &_);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffff7L, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+ { uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x7ffffffffffffff, &x54);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+ { uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+ { uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+ { uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+ { uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+ { uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+ { uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+ { uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+ { uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+ { uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+ { uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+ { uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+ { uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+ { uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+ { uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+ { uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+ { uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+ { uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+ { uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+ { uint64_t _; uint64_t x122 = _mulx_u64(x107, 0x8e38e38e38e38e39L, &_);
+ { uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffff7L, &x126);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x7ffffffffffffff, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+ { uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+ { uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+ { uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+ { uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+ { uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+ { uint8_t x163 = (x162 + x120);
+ { uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+ { uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+ { uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+ { uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+ { uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+ { uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+ { uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+ { uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+ { uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+ { uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+ { uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+ { uint64_t _; uint64_t x204 = _mulx_u64(x189, 0x8e38e38e38e38e39L, &_);
+ { uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffff7L, &x208);
+ { uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+ { uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+ { uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x7ffffffffffffff, &x217);
+ { uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+ { uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+ { uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+ { uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+ { uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+ { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+ { uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+ { uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+ { uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+ { uint8_t x245 = (x244 + x202);
+ { uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+ { uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+ { uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+ { uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+ { uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+ { uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+ { uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+ { uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+ { uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+ { uint64_t _; uint64_t x286 = _mulx_u64(x271, 0x8e38e38e38e38e39L, &_);
+ { uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffff7L, &x290);
+ { uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+ { uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+ { uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x7ffffffffffffff, &x299);
+ { uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+ { uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+ { uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+ { uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+ { uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+ { uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+ { uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+ { uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+ { uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+ { uint8_t x327 = (x326 + x284);
+ { uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffff7L, &x329);
+ { uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+ { uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+ { uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x7ffffffffffffff, &x338);
+ { uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+ { uint64_t x343 = cmovznz(x342, x338, x325);
+ { uint64_t x344 = cmovznz(x342, x335, x322);
+ { uint64_t x345 = cmovznz(x342, x332, x319);
+ { uint64_t x346 = cmovznz(x342, x329, x316);
+ out[0] = x346;
+ out[1] = x345;
+ out[2] = x344;
+ out[3] = x343;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e251m9/fenz.c b/src/Specific/montgomery64_2e251m9/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e251m9/fenz.c
+++ b/src/Specific/montgomery64_2e251m9/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e251m9/feopp.c b/src/Specific/montgomery64_2e251m9/feopp.c
index b01942c17..17e6da548 100644
--- a/src/Specific/montgomery64_2e251m9/feopp.c
+++ b/src/Specific/montgomery64_2e251m9/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xfffffffffffffff7L);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint64_t x32 = (x19 & 0x7ffffffffffffff);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xfffffffffffffff7L);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint64_t x32 = (x19 & 0x7ffffffffffffff);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e251m9/fesub.c b/src/Specific/montgomery64_2e251m9/fesub.c
index 450aeb7aa..ae30748ea 100644
--- a/src/Specific/montgomery64_2e251m9/fesub.c
+++ b/src/Specific/montgomery64_2e251m9/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xfffffffffffffff7L);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint64_t x41 = (x28 & 0x7ffffffffffffff);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xfffffffffffffff7L);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint64_t x41 = (x28 & 0x7ffffffffffffff);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e254m127x2e240m1/feadd.c b/src/Specific/montgomery64_2e254m127x2e240m1/feadd.c
index d080f0462..0abe6fc52 100644
--- a/src/Specific/montgomery64_2e254m127x2e240m1/feadd.c
+++ b/src/Specific/montgomery64_2e254m127x2e240m1/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffffL, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x3f80ffffffffffff, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffffL, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x3f80ffffffffffff, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e254m127x2e240m1/femul.c b/src/Specific/montgomery64_2e254m127x2e240m1/femul.c
index 2793eb2b1..1ba62c3d5 100644
--- a/src/Specific/montgomery64_2e254m127x2e240m1/femul.c
+++ b/src/Specific/montgomery64_2e254m127x2e240m1/femul.c
@@ -1,136 +1,126 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t x42; uint64_t x41 = _mulx_u64(x17, 0xffffffffffffffffL, &x42);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x17, 0xffffffffffffffffL, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x17, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x17, 0x3f80ffffffffffff, &x51);
-{ uint64_t x53; uint8_t x54 = _addcarryx_u64(0x0, x42, x44, &x53);
-{ uint64_t x56; uint8_t x57 = _addcarryx_u64(x54, x45, x47, &x56);
-{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
-{ uint64_t x62; uint8_t _ = _addcarryx_u64(0x0, x60, x51, &x62);
-{ uint64_t _; uint8_t x66 = _addcarryx_u64(0x0, x17, x41, &_);
-{ uint64_t x68; uint8_t x69 = _addcarryx_u64(x66, x29, x53, &x68);
-{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x32, x56, &x71);
-{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x35, x59, &x74);
-{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x38, x62, &x77);
-{ uint64_t x81; uint64_t x80 = _mulx_u64(x7, x11, &x81);
-{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x13, &x84);
-{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x15, &x87);
-{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x14, &x90);
-{ uint64_t x92; uint8_t x93 = _addcarryx_u64(0x0, x81, x83, &x92);
-{ uint64_t x95; uint8_t x96 = _addcarryx_u64(x93, x84, x86, &x95);
-{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
-{ uint64_t x101; uint8_t _ = _addcarryx_u64(0x0, x99, x90, &x101);
-{ uint64_t x104; uint8_t x105 = _addcarryx_u64(0x0, x68, x80, &x104);
-{ uint64_t x107; uint8_t x108 = _addcarryx_u64(x105, x71, x92, &x107);
-{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
-{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
-{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x78, x101, &x116);
-{ uint64_t x120; uint64_t x119 = _mulx_u64(x104, 0xffffffffffffffffL, &x120);
-{ uint64_t x123; uint64_t x122 = _mulx_u64(x104, 0xffffffffffffffffL, &x123);
-{ uint64_t x126; uint64_t x125 = _mulx_u64(x104, 0xffffffffffffffffL, &x126);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x104, 0x3f80ffffffffffff, &x129);
-{ uint64_t x131; uint8_t x132 = _addcarryx_u64(0x0, x120, x122, &x131);
-{ uint64_t x134; uint8_t x135 = _addcarryx_u64(x132, x123, x125, &x134);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(x135, x126, x128, &x137);
-{ uint64_t x140; uint8_t _ = _addcarryx_u64(0x0, x138, x129, &x140);
-{ uint64_t _; uint8_t x144 = _addcarryx_u64(0x0, x104, x119, &_);
-{ uint64_t x146; uint8_t x147 = _addcarryx_u64(x144, x107, x131, &x146);
-{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x110, x134, &x149);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x113, x137, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x116, x140, &x155);
-{ uint8_t x157 = (x156 + x117);
-{ uint64_t x160; uint64_t x159 = _mulx_u64(x9, x11, &x160);
-{ uint64_t x163; uint64_t x162 = _mulx_u64(x9, x13, &x163);
-{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x15, &x166);
-{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x14, &x169);
-{ uint64_t x171; uint8_t x172 = _addcarryx_u64(0x0, x160, x162, &x171);
-{ uint64_t x174; uint8_t x175 = _addcarryx_u64(x172, x163, x165, &x174);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(x175, x166, x168, &x177);
-{ uint64_t x180; uint8_t _ = _addcarryx_u64(0x0, x178, x169, &x180);
-{ uint64_t x183; uint8_t x184 = _addcarryx_u64(0x0, x146, x159, &x183);
-{ uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x149, x171, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x152, x174, &x189);
-{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
-{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x157, x180, &x195);
-{ uint64_t x199; uint64_t x198 = _mulx_u64(x183, 0xffffffffffffffffL, &x199);
-{ uint64_t x202; uint64_t x201 = _mulx_u64(x183, 0xffffffffffffffffL, &x202);
-{ uint64_t x205; uint64_t x204 = _mulx_u64(x183, 0xffffffffffffffffL, &x205);
-{ uint64_t x208; uint64_t x207 = _mulx_u64(x183, 0x3f80ffffffffffff, &x208);
-{ uint64_t x210; uint8_t x211 = _addcarryx_u64(0x0, x199, x201, &x210);
-{ uint64_t x213; uint8_t x214 = _addcarryx_u64(x211, x202, x204, &x213);
-{ uint64_t x216; uint8_t x217 = _addcarryx_u64(x214, x205, x207, &x216);
-{ uint64_t x219; uint8_t _ = _addcarryx_u64(0x0, x217, x208, &x219);
-{ uint64_t _; uint8_t x223 = _addcarryx_u64(0x0, x183, x198, &_);
-{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x186, x210, &x225);
-{ uint64_t x228; uint8_t x229 = _addcarryx_u64(x226, x189, x213, &x228);
-{ uint64_t x231; uint8_t x232 = _addcarryx_u64(x229, x192, x216, &x231);
-{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x195, x219, &x234);
-{ uint8_t x236 = (x235 + x196);
-{ uint64_t x239; uint64_t x238 = _mulx_u64(x8, x11, &x239);
-{ uint64_t x242; uint64_t x241 = _mulx_u64(x8, x13, &x242);
-{ uint64_t x245; uint64_t x244 = _mulx_u64(x8, x15, &x245);
-{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x14, &x248);
-{ uint64_t x250; uint8_t x251 = _addcarryx_u64(0x0, x239, x241, &x250);
-{ uint64_t x253; uint8_t x254 = _addcarryx_u64(x251, x242, x244, &x253);
-{ uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x245, x247, &x256);
-{ uint64_t x259; uint8_t _ = _addcarryx_u64(0x0, x257, x248, &x259);
-{ uint64_t x262; uint8_t x263 = _addcarryx_u64(0x0, x225, x238, &x262);
-{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x228, x250, &x265);
-{ uint64_t x268; uint8_t x269 = _addcarryx_u64(x266, x231, x253, &x268);
-{ uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x234, x256, &x271);
-{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x236, x259, &x274);
-{ uint64_t x278; uint64_t x277 = _mulx_u64(x262, 0xffffffffffffffffL, &x278);
-{ uint64_t x281; uint64_t x280 = _mulx_u64(x262, 0xffffffffffffffffL, &x281);
-{ uint64_t x284; uint64_t x283 = _mulx_u64(x262, 0xffffffffffffffffL, &x284);
-{ uint64_t x287; uint64_t x286 = _mulx_u64(x262, 0x3f80ffffffffffff, &x287);
-{ uint64_t x289; uint8_t x290 = _addcarryx_u64(0x0, x278, x280, &x289);
-{ uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x281, x283, &x292);
-{ uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x284, x286, &x295);
-{ uint64_t x298; uint8_t _ = _addcarryx_u64(0x0, x296, x287, &x298);
-{ uint64_t _; uint8_t x302 = _addcarryx_u64(0x0, x262, x277, &_);
-{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x265, x289, &x304);
-{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x268, x292, &x307);
-{ uint64_t x310; uint8_t x311 = _addcarryx_u64(x308, x271, x295, &x310);
-{ uint64_t x313; uint8_t x314 = _addcarryx_u64(x311, x274, x298, &x313);
-{ uint8_t x315 = (x314 + x275);
-{ uint64_t x317; uint8_t x318 = _subborrow_u64(0x0, x304, 0xffffffffffffffffL, &x317);
-{ uint64_t x320; uint8_t x321 = _subborrow_u64(x318, x307, 0xffffffffffffffffL, &x320);
-{ uint64_t x323; uint8_t x324 = _subborrow_u64(x321, x310, 0xffffffffffffffffL, &x323);
-{ uint64_t x326; uint8_t x327 = _subborrow_u64(x324, x313, 0x3f80ffffffffffff, &x326);
-{ uint64_t _; uint8_t x330 = _subborrow_u64(x327, x315, 0x0, &_);
-{ uint64_t x331 = cmovznz(x330, x326, x313);
-{ uint64_t x332 = cmovznz(x330, x323, x310);
-{ uint64_t x333 = cmovznz(x330, x320, x307);
-{ uint64_t x334 = cmovznz(x330, x317, x304);
-out[0] = x331;
-out[1] = x332;
-out[2] = x333;
-out[3] = x334;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t x42; uint64_t x41 = _mulx_u64(x17, 0xffffffffffffffffL, &x42);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x17, 0xffffffffffffffffL, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x17, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x17, 0x3f80ffffffffffff, &x51);
+ { uint64_t x53; uint8_t x54 = _addcarryx_u64(0x0, x42, x44, &x53);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(x54, x45, x47, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+ { uint64_t x62; uint8_t _ = _addcarryx_u64(0x0, x60, x51, &x62);
+ { uint64_t _; uint8_t x66 = _addcarryx_u64(0x0, x17, x41, &_);
+ { uint64_t x68; uint8_t x69 = _addcarryx_u64(x66, x29, x53, &x68);
+ { uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x32, x56, &x71);
+ { uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x35, x59, &x74);
+ { uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x38, x62, &x77);
+ { uint64_t x81; uint64_t x80 = _mulx_u64(x7, x11, &x81);
+ { uint64_t x84; uint64_t x83 = _mulx_u64(x7, x13, &x84);
+ { uint64_t x87; uint64_t x86 = _mulx_u64(x7, x15, &x87);
+ { uint64_t x90; uint64_t x89 = _mulx_u64(x7, x14, &x90);
+ { uint64_t x92; uint8_t x93 = _addcarryx_u64(0x0, x81, x83, &x92);
+ { uint64_t x95; uint8_t x96 = _addcarryx_u64(x93, x84, x86, &x95);
+ { uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+ { uint64_t x101; uint8_t _ = _addcarryx_u64(0x0, x99, x90, &x101);
+ { uint64_t x104; uint8_t x105 = _addcarryx_u64(0x0, x68, x80, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(x105, x71, x92, &x107);
+ { uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+ { uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+ { uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x78, x101, &x116);
+ { uint64_t x120; uint64_t x119 = _mulx_u64(x104, 0xffffffffffffffffL, &x120);
+ { uint64_t x123; uint64_t x122 = _mulx_u64(x104, 0xffffffffffffffffL, &x123);
+ { uint64_t x126; uint64_t x125 = _mulx_u64(x104, 0xffffffffffffffffL, &x126);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x104, 0x3f80ffffffffffff, &x129);
+ { uint64_t x131; uint8_t x132 = _addcarryx_u64(0x0, x120, x122, &x131);
+ { uint64_t x134; uint8_t x135 = _addcarryx_u64(x132, x123, x125, &x134);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(x135, x126, x128, &x137);
+ { uint64_t x140; uint8_t _ = _addcarryx_u64(0x0, x138, x129, &x140);
+ { uint64_t _; uint8_t x144 = _addcarryx_u64(0x0, x104, x119, &_);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(x144, x107, x131, &x146);
+ { uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x110, x134, &x149);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x113, x137, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x116, x140, &x155);
+ { uint8_t x157 = (x156 + x117);
+ { uint64_t x160; uint64_t x159 = _mulx_u64(x9, x11, &x160);
+ { uint64_t x163; uint64_t x162 = _mulx_u64(x9, x13, &x163);
+ { uint64_t x166; uint64_t x165 = _mulx_u64(x9, x15, &x166);
+ { uint64_t x169; uint64_t x168 = _mulx_u64(x9, x14, &x169);
+ { uint64_t x171; uint8_t x172 = _addcarryx_u64(0x0, x160, x162, &x171);
+ { uint64_t x174; uint8_t x175 = _addcarryx_u64(x172, x163, x165, &x174);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(x175, x166, x168, &x177);
+ { uint64_t x180; uint8_t _ = _addcarryx_u64(0x0, x178, x169, &x180);
+ { uint64_t x183; uint8_t x184 = _addcarryx_u64(0x0, x146, x159, &x183);
+ { uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x149, x171, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x152, x174, &x189);
+ { uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+ { uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x157, x180, &x195);
+ { uint64_t x199; uint64_t x198 = _mulx_u64(x183, 0xffffffffffffffffL, &x199);
+ { uint64_t x202; uint64_t x201 = _mulx_u64(x183, 0xffffffffffffffffL, &x202);
+ { uint64_t x205; uint64_t x204 = _mulx_u64(x183, 0xffffffffffffffffL, &x205);
+ { uint64_t x208; uint64_t x207 = _mulx_u64(x183, 0x3f80ffffffffffff, &x208);
+ { uint64_t x210; uint8_t x211 = _addcarryx_u64(0x0, x199, x201, &x210);
+ { uint64_t x213; uint8_t x214 = _addcarryx_u64(x211, x202, x204, &x213);
+ { uint64_t x216; uint8_t x217 = _addcarryx_u64(x214, x205, x207, &x216);
+ { uint64_t x219; uint8_t _ = _addcarryx_u64(0x0, x217, x208, &x219);
+ { uint64_t _; uint8_t x223 = _addcarryx_u64(0x0, x183, x198, &_);
+ { uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x186, x210, &x225);
+ { uint64_t x228; uint8_t x229 = _addcarryx_u64(x226, x189, x213, &x228);
+ { uint64_t x231; uint8_t x232 = _addcarryx_u64(x229, x192, x216, &x231);
+ { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x195, x219, &x234);
+ { uint8_t x236 = (x235 + x196);
+ { uint64_t x239; uint64_t x238 = _mulx_u64(x8, x11, &x239);
+ { uint64_t x242; uint64_t x241 = _mulx_u64(x8, x13, &x242);
+ { uint64_t x245; uint64_t x244 = _mulx_u64(x8, x15, &x245);
+ { uint64_t x248; uint64_t x247 = _mulx_u64(x8, x14, &x248);
+ { uint64_t x250; uint8_t x251 = _addcarryx_u64(0x0, x239, x241, &x250);
+ { uint64_t x253; uint8_t x254 = _addcarryx_u64(x251, x242, x244, &x253);
+ { uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x245, x247, &x256);
+ { uint64_t x259; uint8_t _ = _addcarryx_u64(0x0, x257, x248, &x259);
+ { uint64_t x262; uint8_t x263 = _addcarryx_u64(0x0, x225, x238, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x228, x250, &x265);
+ { uint64_t x268; uint8_t x269 = _addcarryx_u64(x266, x231, x253, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x234, x256, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x236, x259, &x274);
+ { uint64_t x278; uint64_t x277 = _mulx_u64(x262, 0xffffffffffffffffL, &x278);
+ { uint64_t x281; uint64_t x280 = _mulx_u64(x262, 0xffffffffffffffffL, &x281);
+ { uint64_t x284; uint64_t x283 = _mulx_u64(x262, 0xffffffffffffffffL, &x284);
+ { uint64_t x287; uint64_t x286 = _mulx_u64(x262, 0x3f80ffffffffffff, &x287);
+ { uint64_t x289; uint8_t x290 = _addcarryx_u64(0x0, x278, x280, &x289);
+ { uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x281, x283, &x292);
+ { uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x284, x286, &x295);
+ { uint64_t x298; uint8_t _ = _addcarryx_u64(0x0, x296, x287, &x298);
+ { uint64_t _; uint8_t x302 = _addcarryx_u64(0x0, x262, x277, &_);
+ { uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x265, x289, &x304);
+ { uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x268, x292, &x307);
+ { uint64_t x310; uint8_t x311 = _addcarryx_u64(x308, x271, x295, &x310);
+ { uint64_t x313; uint8_t x314 = _addcarryx_u64(x311, x274, x298, &x313);
+ { uint8_t x315 = (x314 + x275);
+ { uint64_t x317; uint8_t x318 = _subborrow_u64(0x0, x304, 0xffffffffffffffffL, &x317);
+ { uint64_t x320; uint8_t x321 = _subborrow_u64(x318, x307, 0xffffffffffffffffL, &x320);
+ { uint64_t x323; uint8_t x324 = _subborrow_u64(x321, x310, 0xffffffffffffffffL, &x323);
+ { uint64_t x326; uint8_t x327 = _subborrow_u64(x324, x313, 0x3f80ffffffffffff, &x326);
+ { uint64_t _; uint8_t x330 = _subborrow_u64(x327, x315, 0x0, &_);
+ { uint64_t x331 = cmovznz(x330, x326, x313);
+ { uint64_t x332 = cmovznz(x330, x323, x310);
+ { uint64_t x333 = cmovznz(x330, x320, x307);
+ { uint64_t x334 = cmovznz(x330, x317, x304);
+ out[0] = x334;
+ out[1] = x333;
+ out[2] = x332;
+ out[3] = x331;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e254m127x2e240m1/fenz.c b/src/Specific/montgomery64_2e254m127x2e240m1/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e254m127x2e240m1/fenz.c
+++ b/src/Specific/montgomery64_2e254m127x2e240m1/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e254m127x2e240m1/feopp.c b/src/Specific/montgomery64_2e254m127x2e240m1/feopp.c
index 4114a0200..7ec5b940b 100644
--- a/src/Specific/montgomery64_2e254m127x2e240m1/feopp.c
+++ b/src/Specific/montgomery64_2e254m127x2e240m1/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint64_t x32 = (x19 & 0x3f80ffffffffffff);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint64_t x32 = (x19 & 0x3f80ffffffffffff);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e254m127x2e240m1/fesub.c b/src/Specific/montgomery64_2e254m127x2e240m1/fesub.c
index b2a839023..f36de2fd6 100644
--- a/src/Specific/montgomery64_2e254m127x2e240m1/fesub.c
+++ b/src/Specific/montgomery64_2e254m127x2e240m1/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint64_t x41 = (x28 & 0x3f80ffffffffffff);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint64_t x41 = (x28 & 0x3f80ffffffffffff);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e255m19/feadd.c b/src/Specific/montgomery64_2e255m19/feadd.c
index 82ea51b0d..32341f1d4 100644
--- a/src/Specific/montgomery64_2e255m19/feadd.c
+++ b/src/Specific/montgomery64_2e255m19/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffedL, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x7fffffffffffffffL, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffedL, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x7fffffffffffffffL, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e255m19/femul.c b/src/Specific/montgomery64_2e255m19/femul.c
index eac0818f5..fa07ad60a 100644
--- a/src/Specific/montgomery64_2e255m19/femul.c
+++ b/src/Specific/montgomery64_2e255m19/femul.c
@@ -1,140 +1,130 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0x86bca1af286bca1bL, &_);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffffedL, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
-{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x7fffffffffffffffL, &x54);
-{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
-{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
-{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
-{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
-{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
-{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
-{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
-{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
-{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
-{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
-{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
-{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
-{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
-{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
-{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
-{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
-{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
-{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
-{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
-{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
-{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
-{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0x86bca1af286bca1bL, &_);
-{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xffffffffffffffedL, &x126);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x7fffffffffffffffL, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
-{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
-{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
-{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
-{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
-{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
-{ uint8_t x163 = (x162 + x120);
-{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
-{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
-{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
-{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
-{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
-{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
-{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
-{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
-{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
-{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
-{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
-{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0x86bca1af286bca1bL, &_);
-{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xffffffffffffffedL, &x208);
-{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
-{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
-{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x7fffffffffffffffL, &x217);
-{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
-{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
-{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
-{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
-{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
-{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
-{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
-{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
-{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
-{ uint8_t x245 = (x244 + x202);
-{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
-{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
-{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
-{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
-{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
-{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
-{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
-{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
-{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
-{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
-{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
-{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
-{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
-{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0x86bca1af286bca1bL, &_);
-{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xffffffffffffffedL, &x290);
-{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
-{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
-{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x7fffffffffffffffL, &x299);
-{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
-{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
-{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
-{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
-{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
-{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
-{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
-{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
-{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
-{ uint8_t x327 = (x326 + x284);
-{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xffffffffffffffedL, &x329);
-{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
-{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
-{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x7fffffffffffffffL, &x338);
-{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
-{ uint64_t x343 = cmovznz(x342, x338, x325);
-{ uint64_t x344 = cmovznz(x342, x335, x322);
-{ uint64_t x345 = cmovznz(x342, x332, x319);
-{ uint64_t x346 = cmovznz(x342, x329, x316);
-out[0] = x343;
-out[1] = x344;
-out[2] = x345;
-out[3] = x346;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t _; uint64_t x41 = _mulx_u64(x17, 0x86bca1af286bca1bL, &_);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffffedL, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+ { uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x7fffffffffffffffL, &x54);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+ { uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+ { uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+ { uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+ { uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+ { uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+ { uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+ { uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+ { uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+ { uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+ { uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+ { uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+ { uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+ { uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+ { uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+ { uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+ { uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+ { uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+ { uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+ { uint64_t _; uint64_t x122 = _mulx_u64(x107, 0x86bca1af286bca1bL, &_);
+ { uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xffffffffffffffedL, &x126);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x7fffffffffffffffL, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+ { uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+ { uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+ { uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+ { uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+ { uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+ { uint8_t x163 = (x162 + x120);
+ { uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+ { uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+ { uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+ { uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+ { uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+ { uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+ { uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+ { uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+ { uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+ { uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+ { uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+ { uint64_t _; uint64_t x204 = _mulx_u64(x189, 0x86bca1af286bca1bL, &_);
+ { uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xffffffffffffffedL, &x208);
+ { uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+ { uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+ { uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x7fffffffffffffffL, &x217);
+ { uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+ { uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+ { uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+ { uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+ { uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+ { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+ { uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+ { uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+ { uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+ { uint8_t x245 = (x244 + x202);
+ { uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+ { uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+ { uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+ { uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+ { uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+ { uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+ { uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+ { uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+ { uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+ { uint64_t _; uint64_t x286 = _mulx_u64(x271, 0x86bca1af286bca1bL, &_);
+ { uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xffffffffffffffedL, &x290);
+ { uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+ { uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+ { uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x7fffffffffffffffL, &x299);
+ { uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+ { uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+ { uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+ { uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+ { uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+ { uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+ { uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+ { uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+ { uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+ { uint8_t x327 = (x326 + x284);
+ { uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xffffffffffffffedL, &x329);
+ { uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+ { uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+ { uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x7fffffffffffffffL, &x338);
+ { uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+ { uint64_t x343 = cmovznz(x342, x338, x325);
+ { uint64_t x344 = cmovznz(x342, x335, x322);
+ { uint64_t x345 = cmovznz(x342, x332, x319);
+ { uint64_t x346 = cmovznz(x342, x329, x316);
+ out[0] = x346;
+ out[1] = x345;
+ out[2] = x344;
+ out[3] = x343;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e255m19/fenz.c b/src/Specific/montgomery64_2e255m19/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e255m19/fenz.c
+++ b/src/Specific/montgomery64_2e255m19/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e255m19/feopp.c b/src/Specific/montgomery64_2e255m19/feopp.c
index d47a8efe4..63a6cccaa 100644
--- a/src/Specific/montgomery64_2e255m19/feopp.c
+++ b/src/Specific/montgomery64_2e255m19/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xffffffffffffffedL);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint64_t x32 = (x19 & 0x7fffffffffffffffL);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xffffffffffffffedL);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint64_t x32 = (x19 & 0x7fffffffffffffffL);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e255m19/fesub.c b/src/Specific/montgomery64_2e255m19/fesub.c
index 0f8d12797..3d5efe708 100644
--- a/src/Specific/montgomery64_2e255m19/fesub.c
+++ b/src/Specific/montgomery64_2e255m19/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xffffffffffffffedL);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint64_t x41 = (x28 & 0x7fffffffffffffffL);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xffffffffffffffedL);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint64_t x41 = (x28 & 0x7fffffffffffffffL);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e255m2e4m2e1m1/feadd.c b/src/Specific/montgomery64_2e255m2e4m2e1m1/feadd.c
index 82ea51b0d..32341f1d4 100644
--- a/src/Specific/montgomery64_2e255m2e4m2e1m1/feadd.c
+++ b/src/Specific/montgomery64_2e255m2e4m2e1m1/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffedL, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x7fffffffffffffffL, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffedL, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x7fffffffffffffffL, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e255m2e4m2e1m1/femul.c b/src/Specific/montgomery64_2e255m2e4m2e1m1/femul.c
index eac0818f5..fa07ad60a 100644
--- a/src/Specific/montgomery64_2e255m2e4m2e1m1/femul.c
+++ b/src/Specific/montgomery64_2e255m2e4m2e1m1/femul.c
@@ -1,140 +1,130 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0x86bca1af286bca1bL, &_);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffffedL, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
-{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x7fffffffffffffffL, &x54);
-{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
-{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
-{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
-{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
-{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
-{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
-{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
-{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
-{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
-{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
-{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
-{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
-{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
-{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
-{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
-{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
-{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
-{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
-{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
-{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
-{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
-{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0x86bca1af286bca1bL, &_);
-{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xffffffffffffffedL, &x126);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x7fffffffffffffffL, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
-{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
-{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
-{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
-{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
-{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
-{ uint8_t x163 = (x162 + x120);
-{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
-{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
-{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
-{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
-{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
-{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
-{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
-{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
-{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
-{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
-{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
-{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0x86bca1af286bca1bL, &_);
-{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xffffffffffffffedL, &x208);
-{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
-{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
-{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x7fffffffffffffffL, &x217);
-{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
-{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
-{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
-{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
-{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
-{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
-{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
-{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
-{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
-{ uint8_t x245 = (x244 + x202);
-{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
-{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
-{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
-{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
-{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
-{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
-{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
-{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
-{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
-{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
-{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
-{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
-{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
-{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0x86bca1af286bca1bL, &_);
-{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xffffffffffffffedL, &x290);
-{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
-{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
-{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x7fffffffffffffffL, &x299);
-{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
-{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
-{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
-{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
-{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
-{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
-{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
-{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
-{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
-{ uint8_t x327 = (x326 + x284);
-{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xffffffffffffffedL, &x329);
-{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
-{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
-{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x7fffffffffffffffL, &x338);
-{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
-{ uint64_t x343 = cmovznz(x342, x338, x325);
-{ uint64_t x344 = cmovznz(x342, x335, x322);
-{ uint64_t x345 = cmovznz(x342, x332, x319);
-{ uint64_t x346 = cmovznz(x342, x329, x316);
-out[0] = x343;
-out[1] = x344;
-out[2] = x345;
-out[3] = x346;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t _; uint64_t x41 = _mulx_u64(x17, 0x86bca1af286bca1bL, &_);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffffedL, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+ { uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x7fffffffffffffffL, &x54);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+ { uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+ { uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+ { uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+ { uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+ { uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+ { uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+ { uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+ { uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+ { uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+ { uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+ { uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+ { uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+ { uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+ { uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+ { uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+ { uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+ { uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+ { uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+ { uint64_t _; uint64_t x122 = _mulx_u64(x107, 0x86bca1af286bca1bL, &_);
+ { uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xffffffffffffffedL, &x126);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x7fffffffffffffffL, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+ { uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+ { uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+ { uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+ { uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+ { uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+ { uint8_t x163 = (x162 + x120);
+ { uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+ { uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+ { uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+ { uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+ { uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+ { uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+ { uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+ { uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+ { uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+ { uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+ { uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+ { uint64_t _; uint64_t x204 = _mulx_u64(x189, 0x86bca1af286bca1bL, &_);
+ { uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xffffffffffffffedL, &x208);
+ { uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+ { uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+ { uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x7fffffffffffffffL, &x217);
+ { uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+ { uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+ { uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+ { uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+ { uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+ { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+ { uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+ { uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+ { uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+ { uint8_t x245 = (x244 + x202);
+ { uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+ { uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+ { uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+ { uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+ { uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+ { uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+ { uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+ { uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+ { uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+ { uint64_t _; uint64_t x286 = _mulx_u64(x271, 0x86bca1af286bca1bL, &_);
+ { uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xffffffffffffffedL, &x290);
+ { uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+ { uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+ { uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x7fffffffffffffffL, &x299);
+ { uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+ { uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+ { uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+ { uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+ { uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+ { uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+ { uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+ { uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+ { uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+ { uint8_t x327 = (x326 + x284);
+ { uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xffffffffffffffedL, &x329);
+ { uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+ { uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+ { uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x7fffffffffffffffL, &x338);
+ { uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+ { uint64_t x343 = cmovznz(x342, x338, x325);
+ { uint64_t x344 = cmovznz(x342, x335, x322);
+ { uint64_t x345 = cmovznz(x342, x332, x319);
+ { uint64_t x346 = cmovznz(x342, x329, x316);
+ out[0] = x346;
+ out[1] = x345;
+ out[2] = x344;
+ out[3] = x343;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e255m2e4m2e1m1/fenz.c b/src/Specific/montgomery64_2e255m2e4m2e1m1/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e255m2e4m2e1m1/fenz.c
+++ b/src/Specific/montgomery64_2e255m2e4m2e1m1/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e255m2e4m2e1m1/feopp.c b/src/Specific/montgomery64_2e255m2e4m2e1m1/feopp.c
index d47a8efe4..63a6cccaa 100644
--- a/src/Specific/montgomery64_2e255m2e4m2e1m1/feopp.c
+++ b/src/Specific/montgomery64_2e255m2e4m2e1m1/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xffffffffffffffedL);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint64_t x32 = (x19 & 0x7fffffffffffffffL);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xffffffffffffffedL);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint64_t x32 = (x19 & 0x7fffffffffffffffL);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e255m2e4m2e1m1/fesub.c b/src/Specific/montgomery64_2e255m2e4m2e1m1/fesub.c
index 0f8d12797..3d5efe708 100644
--- a/src/Specific/montgomery64_2e255m2e4m2e1m1/fesub.c
+++ b/src/Specific/montgomery64_2e255m2e4m2e1m1/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xffffffffffffffedL);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint64_t x41 = (x28 & 0x7fffffffffffffffL);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xffffffffffffffedL);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint64_t x41 = (x28 & 0x7fffffffffffffffL);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e255m765/feadd.c b/src/Specific/montgomery64_2e255m765/feadd.c
index 304403933..0d958f563 100644
--- a/src/Specific/montgomery64_2e255m765/feadd.c
+++ b/src/Specific/montgomery64_2e255m765/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffd03L, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x7fffffffffffffffL, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffd03L, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x7fffffffffffffffL, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e255m765/femul.c b/src/Specific/montgomery64_2e255m765/femul.c
index 232dec347..086ee490e 100644
--- a/src/Specific/montgomery64_2e255m765/femul.c
+++ b/src/Specific/montgomery64_2e255m765/femul.c
@@ -1,140 +1,130 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xaa54ffaa54ffaa55L, &_);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffd03L, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
-{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x7fffffffffffffffL, &x54);
-{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
-{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
-{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
-{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
-{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
-{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
-{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
-{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
-{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
-{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
-{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
-{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
-{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
-{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
-{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
-{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
-{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
-{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
-{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
-{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
-{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
-{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xaa54ffaa54ffaa55L, &_);
-{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffd03L, &x126);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x7fffffffffffffffL, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
-{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
-{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
-{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
-{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
-{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
-{ uint8_t x163 = (x162 + x120);
-{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
-{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
-{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
-{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
-{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
-{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
-{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
-{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
-{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
-{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
-{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
-{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xaa54ffaa54ffaa55L, &_);
-{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffd03L, &x208);
-{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
-{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
-{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x7fffffffffffffffL, &x217);
-{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
-{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
-{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
-{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
-{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
-{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
-{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
-{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
-{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
-{ uint8_t x245 = (x244 + x202);
-{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
-{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
-{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
-{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
-{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
-{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
-{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
-{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
-{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
-{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
-{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
-{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
-{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
-{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xaa54ffaa54ffaa55L, &_);
-{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffd03L, &x290);
-{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
-{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
-{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x7fffffffffffffffL, &x299);
-{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
-{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
-{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
-{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
-{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
-{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
-{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
-{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
-{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
-{ uint8_t x327 = (x326 + x284);
-{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffd03L, &x329);
-{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
-{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
-{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x7fffffffffffffffL, &x338);
-{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
-{ uint64_t x343 = cmovznz(x342, x338, x325);
-{ uint64_t x344 = cmovznz(x342, x335, x322);
-{ uint64_t x345 = cmovznz(x342, x332, x319);
-{ uint64_t x346 = cmovznz(x342, x329, x316);
-out[0] = x343;
-out[1] = x344;
-out[2] = x345;
-out[3] = x346;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xaa54ffaa54ffaa55L, &_);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffd03L, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+ { uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x7fffffffffffffffL, &x54);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+ { uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+ { uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+ { uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+ { uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+ { uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+ { uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+ { uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+ { uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+ { uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+ { uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+ { uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+ { uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+ { uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+ { uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+ { uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+ { uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+ { uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+ { uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+ { uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xaa54ffaa54ffaa55L, &_);
+ { uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffd03L, &x126);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x7fffffffffffffffL, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+ { uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+ { uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+ { uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+ { uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+ { uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+ { uint8_t x163 = (x162 + x120);
+ { uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+ { uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+ { uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+ { uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+ { uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+ { uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+ { uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+ { uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+ { uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+ { uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+ { uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+ { uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xaa54ffaa54ffaa55L, &_);
+ { uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffd03L, &x208);
+ { uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+ { uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+ { uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x7fffffffffffffffL, &x217);
+ { uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+ { uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+ { uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+ { uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+ { uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+ { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+ { uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+ { uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+ { uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+ { uint8_t x245 = (x244 + x202);
+ { uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+ { uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+ { uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+ { uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+ { uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+ { uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+ { uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+ { uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+ { uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+ { uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xaa54ffaa54ffaa55L, &_);
+ { uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffd03L, &x290);
+ { uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+ { uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+ { uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x7fffffffffffffffL, &x299);
+ { uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+ { uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+ { uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+ { uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+ { uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+ { uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+ { uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+ { uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+ { uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+ { uint8_t x327 = (x326 + x284);
+ { uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffd03L, &x329);
+ { uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+ { uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+ { uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x7fffffffffffffffL, &x338);
+ { uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+ { uint64_t x343 = cmovznz(x342, x338, x325);
+ { uint64_t x344 = cmovznz(x342, x335, x322);
+ { uint64_t x345 = cmovznz(x342, x332, x319);
+ { uint64_t x346 = cmovznz(x342, x329, x316);
+ out[0] = x346;
+ out[1] = x345;
+ out[2] = x344;
+ out[3] = x343;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e255m765/fenz.c b/src/Specific/montgomery64_2e255m765/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e255m765/fenz.c
+++ b/src/Specific/montgomery64_2e255m765/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e255m765/feopp.c b/src/Specific/montgomery64_2e255m765/feopp.c
index af36ffb7b..8b2b30fb9 100644
--- a/src/Specific/montgomery64_2e255m765/feopp.c
+++ b/src/Specific/montgomery64_2e255m765/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xfffffffffffffd03L);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint64_t x32 = (x19 & 0x7fffffffffffffffL);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xfffffffffffffd03L);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint64_t x32 = (x19 & 0x7fffffffffffffffL);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e255m765/fesub.c b/src/Specific/montgomery64_2e255m765/fesub.c
index 90f0ffa99..431b34e89 100644
--- a/src/Specific/montgomery64_2e255m765/fesub.c
+++ b/src/Specific/montgomery64_2e255m765/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xfffffffffffffd03L);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint64_t x41 = (x28 & 0x7fffffffffffffffL);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xfffffffffffffd03L);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint64_t x41 = (x28 & 0x7fffffffffffffffL);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e256m189/feadd.c b/src/Specific/montgomery64_2e256m189/feadd.c
index d35e30e75..e1568049f 100644
--- a/src/Specific/montgomery64_2e256m189/feadd.c
+++ b/src/Specific/montgomery64_2e256m189/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffff43L, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xffffffffffffffffL, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffff43L, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xffffffffffffffffL, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e256m189/femul.c b/src/Specific/montgomery64_2e256m189/femul.c
index 214f6b0ef..3744abce3 100644
--- a/src/Specific/montgomery64_2e256m189/femul.c
+++ b/src/Specific/montgomery64_2e256m189/femul.c
@@ -1,140 +1,130 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xa53fa94fea53fa95L, &_);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffff43L, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
-{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0xffffffffffffffffL, &x54);
-{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
-{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
-{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
-{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
-{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
-{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
-{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
-{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
-{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
-{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
-{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
-{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
-{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
-{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
-{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
-{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
-{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
-{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
-{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
-{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
-{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
-{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xa53fa94fea53fa95L, &_);
-{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xffffffffffffff43L, &x126);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0xffffffffffffffffL, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
-{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
-{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
-{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
-{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
-{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
-{ uint8_t x163 = (x162 + x120);
-{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
-{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
-{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
-{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
-{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
-{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
-{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
-{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
-{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
-{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
-{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
-{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xa53fa94fea53fa95L, &_);
-{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xffffffffffffff43L, &x208);
-{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
-{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
-{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0xffffffffffffffffL, &x217);
-{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
-{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
-{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
-{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
-{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
-{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
-{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
-{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
-{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
-{ uint8_t x245 = (x244 + x202);
-{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
-{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
-{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
-{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
-{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
-{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
-{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
-{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
-{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
-{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
-{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
-{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
-{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
-{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xa53fa94fea53fa95L, &_);
-{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xffffffffffffff43L, &x290);
-{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
-{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
-{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0xffffffffffffffffL, &x299);
-{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
-{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
-{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
-{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
-{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
-{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
-{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
-{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
-{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
-{ uint8_t x327 = (x326 + x284);
-{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xffffffffffffff43L, &x329);
-{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
-{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
-{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0xffffffffffffffffL, &x338);
-{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
-{ uint64_t x343 = cmovznz(x342, x338, x325);
-{ uint64_t x344 = cmovznz(x342, x335, x322);
-{ uint64_t x345 = cmovznz(x342, x332, x319);
-{ uint64_t x346 = cmovznz(x342, x329, x316);
-out[0] = x343;
-out[1] = x344;
-out[2] = x345;
-out[3] = x346;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xa53fa94fea53fa95L, &_);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffff43L, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+ { uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0xffffffffffffffffL, &x54);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+ { uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+ { uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+ { uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+ { uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+ { uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+ { uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+ { uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+ { uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+ { uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+ { uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+ { uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+ { uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+ { uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+ { uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+ { uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+ { uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+ { uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+ { uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+ { uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xa53fa94fea53fa95L, &_);
+ { uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xffffffffffffff43L, &x126);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0xffffffffffffffffL, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+ { uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+ { uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+ { uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+ { uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+ { uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+ { uint8_t x163 = (x162 + x120);
+ { uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+ { uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+ { uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+ { uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+ { uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+ { uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+ { uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+ { uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+ { uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+ { uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+ { uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+ { uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xa53fa94fea53fa95L, &_);
+ { uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xffffffffffffff43L, &x208);
+ { uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+ { uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+ { uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0xffffffffffffffffL, &x217);
+ { uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+ { uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+ { uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+ { uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+ { uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+ { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+ { uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+ { uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+ { uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+ { uint8_t x245 = (x244 + x202);
+ { uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+ { uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+ { uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+ { uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+ { uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+ { uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+ { uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+ { uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+ { uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+ { uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xa53fa94fea53fa95L, &_);
+ { uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xffffffffffffff43L, &x290);
+ { uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+ { uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+ { uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0xffffffffffffffffL, &x299);
+ { uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+ { uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+ { uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+ { uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+ { uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+ { uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+ { uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+ { uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+ { uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+ { uint8_t x327 = (x326 + x284);
+ { uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xffffffffffffff43L, &x329);
+ { uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+ { uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+ { uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0xffffffffffffffffL, &x338);
+ { uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+ { uint64_t x343 = cmovznz(x342, x338, x325);
+ { uint64_t x344 = cmovznz(x342, x335, x322);
+ { uint64_t x345 = cmovznz(x342, x332, x319);
+ { uint64_t x346 = cmovznz(x342, x329, x316);
+ out[0] = x346;
+ out[1] = x345;
+ out[2] = x344;
+ out[3] = x343;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e256m189/fenz.c b/src/Specific/montgomery64_2e256m189/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e256m189/fenz.c
+++ b/src/Specific/montgomery64_2e256m189/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e256m189/feopp.c b/src/Specific/montgomery64_2e256m189/feopp.c
index 270aa6700..fc90bc87d 100644
--- a/src/Specific/montgomery64_2e256m189/feopp.c
+++ b/src/Specific/montgomery64_2e256m189/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xffffffffffffff43L);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint64_t x32 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xffffffffffffff43L);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint64_t x32 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e256m189/fesub.c b/src/Specific/montgomery64_2e256m189/fesub.c
index 251a20c47..4ba6a7b5a 100644
--- a/src/Specific/montgomery64_2e256m189/fesub.c
+++ b/src/Specific/montgomery64_2e256m189/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xffffffffffffff43L);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint64_t x41 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xffffffffffffff43L);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint64_t x41 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feadd.c b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feadd.c
index 629cca0a0..12306c312 100644
--- a/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feadd.c
+++ b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffffL, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffff, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0x0, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xffffffff00000001L, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffffL, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffff, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0x0, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xffffffff00000001L, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/femul.c b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/femul.c
index 7e33d1870..acea3b182 100644
--- a/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/femul.c
+++ b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/femul.c
@@ -1,132 +1,122 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t x42; uint64_t x41 = _mulx_u64(x17, 0xffffffffffffffffL, &x42);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x17, 0xffffffff, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x17, 0xffffffff00000001L, &x48);
-{ uint64_t x50; uint8_t x51 = _addcarryx_u64(0x0, x42, x44, &x50);
-{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x51, x45, 0x0, &x53);
-{ uint64_t x56; uint8_t x57 = _addcarryx_u64(x54, 0x0, x47, &x56);
-{ uint64_t x59; uint8_t _ = _addcarryx_u64(0x0, x57, x48, &x59);
-{ uint64_t _; uint8_t x63 = _addcarryx_u64(0x0, x17, x41, &_);
-{ uint64_t x65; uint8_t x66 = _addcarryx_u64(x63, x29, x50, &x65);
-{ uint64_t x68; uint8_t x69 = _addcarryx_u64(x66, x32, x53, &x68);
-{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x35, x56, &x71);
-{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x38, x59, &x74);
-{ uint64_t x78; uint64_t x77 = _mulx_u64(x7, x11, &x78);
-{ uint64_t x81; uint64_t x80 = _mulx_u64(x7, x13, &x81);
-{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x15, &x84);
-{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x14, &x87);
-{ uint64_t x89; uint8_t x90 = _addcarryx_u64(0x0, x78, x80, &x89);
-{ uint64_t x92; uint8_t x93 = _addcarryx_u64(x90, x81, x83, &x92);
-{ uint64_t x95; uint8_t x96 = _addcarryx_u64(x93, x84, x86, &x95);
-{ uint64_t x98; uint8_t _ = _addcarryx_u64(0x0, x96, x87, &x98);
-{ uint64_t x101; uint8_t x102 = _addcarryx_u64(0x0, x65, x77, &x101);
-{ uint64_t x104; uint8_t x105 = _addcarryx_u64(x102, x68, x89, &x104);
-{ uint64_t x107; uint8_t x108 = _addcarryx_u64(x105, x71, x92, &x107);
-{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
-{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x75, x98, &x113);
-{ uint64_t x117; uint64_t x116 = _mulx_u64(x101, 0xffffffffffffffffL, &x117);
-{ uint64_t x120; uint64_t x119 = _mulx_u64(x101, 0xffffffff, &x120);
-{ uint64_t x123; uint64_t x122 = _mulx_u64(x101, 0xffffffff00000001L, &x123);
-{ uint64_t x125; uint8_t x126 = _addcarryx_u64(0x0, x117, x119, &x125);
-{ uint64_t x128; uint8_t x129 = _addcarryx_u64(x126, x120, 0x0, &x128);
-{ uint64_t x131; uint8_t x132 = _addcarryx_u64(x129, 0x0, x122, &x131);
-{ uint64_t x134; uint8_t _ = _addcarryx_u64(0x0, x132, x123, &x134);
-{ uint64_t _; uint8_t x138 = _addcarryx_u64(0x0, x101, x116, &_);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x104, x125, &x140);
-{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x107, x128, &x143);
-{ uint64_t x146; uint8_t x147 = _addcarryx_u64(x144, x110, x131, &x146);
-{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x113, x134, &x149);
-{ uint8_t x151 = (x150 + x114);
-{ uint64_t x154; uint64_t x153 = _mulx_u64(x9, x11, &x154);
-{ uint64_t x157; uint64_t x156 = _mulx_u64(x9, x13, &x157);
-{ uint64_t x160; uint64_t x159 = _mulx_u64(x9, x15, &x160);
-{ uint64_t x163; uint64_t x162 = _mulx_u64(x9, x14, &x163);
-{ uint64_t x165; uint8_t x166 = _addcarryx_u64(0x0, x154, x156, &x165);
-{ uint64_t x168; uint8_t x169 = _addcarryx_u64(x166, x157, x159, &x168);
-{ uint64_t x171; uint8_t x172 = _addcarryx_u64(x169, x160, x162, &x171);
-{ uint64_t x174; uint8_t _ = _addcarryx_u64(0x0, x172, x163, &x174);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x140, x153, &x177);
-{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x143, x165, &x180);
-{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x146, x168, &x183);
-{ uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x149, x171, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x151, x174, &x189);
-{ uint64_t x193; uint64_t x192 = _mulx_u64(x177, 0xffffffffffffffffL, &x193);
-{ uint64_t x196; uint64_t x195 = _mulx_u64(x177, 0xffffffff, &x196);
-{ uint64_t x199; uint64_t x198 = _mulx_u64(x177, 0xffffffff00000001L, &x199);
-{ uint64_t x201; uint8_t x202 = _addcarryx_u64(0x0, x193, x195, &x201);
-{ uint64_t x204; uint8_t x205 = _addcarryx_u64(x202, x196, 0x0, &x204);
-{ uint64_t x207; uint8_t x208 = _addcarryx_u64(x205, 0x0, x198, &x207);
-{ uint64_t x210; uint8_t _ = _addcarryx_u64(0x0, x208, x199, &x210);
-{ uint64_t _; uint8_t x214 = _addcarryx_u64(0x0, x177, x192, &_);
-{ uint64_t x216; uint8_t x217 = _addcarryx_u64(x214, x180, x201, &x216);
-{ uint64_t x219; uint8_t x220 = _addcarryx_u64(x217, x183, x204, &x219);
-{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x186, x207, &x222);
-{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x189, x210, &x225);
-{ uint8_t x227 = (x226 + x190);
-{ uint64_t x230; uint64_t x229 = _mulx_u64(x8, x11, &x230);
-{ uint64_t x233; uint64_t x232 = _mulx_u64(x8, x13, &x233);
-{ uint64_t x236; uint64_t x235 = _mulx_u64(x8, x15, &x236);
-{ uint64_t x239; uint64_t x238 = _mulx_u64(x8, x14, &x239);
-{ uint64_t x241; uint8_t x242 = _addcarryx_u64(0x0, x230, x232, &x241);
-{ uint64_t x244; uint8_t x245 = _addcarryx_u64(x242, x233, x235, &x244);
-{ uint64_t x247; uint8_t x248 = _addcarryx_u64(x245, x236, x238, &x247);
-{ uint64_t x250; uint8_t _ = _addcarryx_u64(0x0, x248, x239, &x250);
-{ uint64_t x253; uint8_t x254 = _addcarryx_u64(0x0, x216, x229, &x253);
-{ uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x219, x241, &x256);
-{ uint64_t x259; uint8_t x260 = _addcarryx_u64(x257, x222, x244, &x259);
-{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x225, x247, &x262);
-{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x227, x250, &x265);
-{ uint64_t x269; uint64_t x268 = _mulx_u64(x253, 0xffffffffffffffffL, &x269);
-{ uint64_t x272; uint64_t x271 = _mulx_u64(x253, 0xffffffff, &x272);
-{ uint64_t x275; uint64_t x274 = _mulx_u64(x253, 0xffffffff00000001L, &x275);
-{ uint64_t x277; uint8_t x278 = _addcarryx_u64(0x0, x269, x271, &x277);
-{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x272, 0x0, &x280);
-{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, 0x0, x274, &x283);
-{ uint64_t x286; uint8_t _ = _addcarryx_u64(0x0, x284, x275, &x286);
-{ uint64_t _; uint8_t x290 = _addcarryx_u64(0x0, x253, x268, &_);
-{ uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x256, x277, &x292);
-{ uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x259, x280, &x295);
-{ uint64_t x298; uint8_t x299 = _addcarryx_u64(x296, x262, x283, &x298);
-{ uint64_t x301; uint8_t x302 = _addcarryx_u64(x299, x265, x286, &x301);
-{ uint8_t x303 = (x302 + x266);
-{ uint64_t x305; uint8_t x306 = _subborrow_u64(0x0, x292, 0xffffffffffffffffL, &x305);
-{ uint64_t x308; uint8_t x309 = _subborrow_u64(x306, x295, 0xffffffff, &x308);
-{ uint64_t x311; uint8_t x312 = _subborrow_u64(x309, x298, 0x0, &x311);
-{ uint64_t x314; uint8_t x315 = _subborrow_u64(x312, x301, 0xffffffff00000001L, &x314);
-{ uint64_t _; uint8_t x318 = _subborrow_u64(x315, x303, 0x0, &_);
-{ uint64_t x319 = cmovznz(x318, x314, x301);
-{ uint64_t x320 = cmovznz(x318, x311, x298);
-{ uint64_t x321 = cmovznz(x318, x308, x295);
-{ uint64_t x322 = cmovznz(x318, x305, x292);
-out[0] = x319;
-out[1] = x320;
-out[2] = x321;
-out[3] = x322;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t x42; uint64_t x41 = _mulx_u64(x17, 0xffffffffffffffffL, &x42);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x17, 0xffffffff, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x17, 0xffffffff00000001L, &x48);
+ { uint64_t x50; uint8_t x51 = _addcarryx_u64(0x0, x42, x44, &x50);
+ { uint64_t x53; uint8_t x54 = _addcarryx_u64(x51, x45, 0x0, &x53);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(x54, 0x0, x47, &x56);
+ { uint64_t x59; uint8_t _ = _addcarryx_u64(0x0, x57, x48, &x59);
+ { uint64_t _; uint8_t x63 = _addcarryx_u64(0x0, x17, x41, &_);
+ { uint64_t x65; uint8_t x66 = _addcarryx_u64(x63, x29, x50, &x65);
+ { uint64_t x68; uint8_t x69 = _addcarryx_u64(x66, x32, x53, &x68);
+ { uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x35, x56, &x71);
+ { uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x38, x59, &x74);
+ { uint64_t x78; uint64_t x77 = _mulx_u64(x7, x11, &x78);
+ { uint64_t x81; uint64_t x80 = _mulx_u64(x7, x13, &x81);
+ { uint64_t x84; uint64_t x83 = _mulx_u64(x7, x15, &x84);
+ { uint64_t x87; uint64_t x86 = _mulx_u64(x7, x14, &x87);
+ { uint64_t x89; uint8_t x90 = _addcarryx_u64(0x0, x78, x80, &x89);
+ { uint64_t x92; uint8_t x93 = _addcarryx_u64(x90, x81, x83, &x92);
+ { uint64_t x95; uint8_t x96 = _addcarryx_u64(x93, x84, x86, &x95);
+ { uint64_t x98; uint8_t _ = _addcarryx_u64(0x0, x96, x87, &x98);
+ { uint64_t x101; uint8_t x102 = _addcarryx_u64(0x0, x65, x77, &x101);
+ { uint64_t x104; uint8_t x105 = _addcarryx_u64(x102, x68, x89, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(x105, x71, x92, &x107);
+ { uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+ { uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x75, x98, &x113);
+ { uint64_t x117; uint64_t x116 = _mulx_u64(x101, 0xffffffffffffffffL, &x117);
+ { uint64_t x120; uint64_t x119 = _mulx_u64(x101, 0xffffffff, &x120);
+ { uint64_t x123; uint64_t x122 = _mulx_u64(x101, 0xffffffff00000001L, &x123);
+ { uint64_t x125; uint8_t x126 = _addcarryx_u64(0x0, x117, x119, &x125);
+ { uint64_t x128; uint8_t x129 = _addcarryx_u64(x126, x120, 0x0, &x128);
+ { uint64_t x131; uint8_t x132 = _addcarryx_u64(x129, 0x0, x122, &x131);
+ { uint64_t x134; uint8_t _ = _addcarryx_u64(0x0, x132, x123, &x134);
+ { uint64_t _; uint8_t x138 = _addcarryx_u64(0x0, x101, x116, &_);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x104, x125, &x140);
+ { uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x107, x128, &x143);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(x144, x110, x131, &x146);
+ { uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x113, x134, &x149);
+ { uint8_t x151 = (x150 + x114);
+ { uint64_t x154; uint64_t x153 = _mulx_u64(x9, x11, &x154);
+ { uint64_t x157; uint64_t x156 = _mulx_u64(x9, x13, &x157);
+ { uint64_t x160; uint64_t x159 = _mulx_u64(x9, x15, &x160);
+ { uint64_t x163; uint64_t x162 = _mulx_u64(x9, x14, &x163);
+ { uint64_t x165; uint8_t x166 = _addcarryx_u64(0x0, x154, x156, &x165);
+ { uint64_t x168; uint8_t x169 = _addcarryx_u64(x166, x157, x159, &x168);
+ { uint64_t x171; uint8_t x172 = _addcarryx_u64(x169, x160, x162, &x171);
+ { uint64_t x174; uint8_t _ = _addcarryx_u64(0x0, x172, x163, &x174);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x140, x153, &x177);
+ { uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x143, x165, &x180);
+ { uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x146, x168, &x183);
+ { uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x149, x171, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x151, x174, &x189);
+ { uint64_t x193; uint64_t x192 = _mulx_u64(x177, 0xffffffffffffffffL, &x193);
+ { uint64_t x196; uint64_t x195 = _mulx_u64(x177, 0xffffffff, &x196);
+ { uint64_t x199; uint64_t x198 = _mulx_u64(x177, 0xffffffff00000001L, &x199);
+ { uint64_t x201; uint8_t x202 = _addcarryx_u64(0x0, x193, x195, &x201);
+ { uint64_t x204; uint8_t x205 = _addcarryx_u64(x202, x196, 0x0, &x204);
+ { uint64_t x207; uint8_t x208 = _addcarryx_u64(x205, 0x0, x198, &x207);
+ { uint64_t x210; uint8_t _ = _addcarryx_u64(0x0, x208, x199, &x210);
+ { uint64_t _; uint8_t x214 = _addcarryx_u64(0x0, x177, x192, &_);
+ { uint64_t x216; uint8_t x217 = _addcarryx_u64(x214, x180, x201, &x216);
+ { uint64_t x219; uint8_t x220 = _addcarryx_u64(x217, x183, x204, &x219);
+ { uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x186, x207, &x222);
+ { uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x189, x210, &x225);
+ { uint8_t x227 = (x226 + x190);
+ { uint64_t x230; uint64_t x229 = _mulx_u64(x8, x11, &x230);
+ { uint64_t x233; uint64_t x232 = _mulx_u64(x8, x13, &x233);
+ { uint64_t x236; uint64_t x235 = _mulx_u64(x8, x15, &x236);
+ { uint64_t x239; uint64_t x238 = _mulx_u64(x8, x14, &x239);
+ { uint64_t x241; uint8_t x242 = _addcarryx_u64(0x0, x230, x232, &x241);
+ { uint64_t x244; uint8_t x245 = _addcarryx_u64(x242, x233, x235, &x244);
+ { uint64_t x247; uint8_t x248 = _addcarryx_u64(x245, x236, x238, &x247);
+ { uint64_t x250; uint8_t _ = _addcarryx_u64(0x0, x248, x239, &x250);
+ { uint64_t x253; uint8_t x254 = _addcarryx_u64(0x0, x216, x229, &x253);
+ { uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x219, x241, &x256);
+ { uint64_t x259; uint8_t x260 = _addcarryx_u64(x257, x222, x244, &x259);
+ { uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x225, x247, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x227, x250, &x265);
+ { uint64_t x269; uint64_t x268 = _mulx_u64(x253, 0xffffffffffffffffL, &x269);
+ { uint64_t x272; uint64_t x271 = _mulx_u64(x253, 0xffffffff, &x272);
+ { uint64_t x275; uint64_t x274 = _mulx_u64(x253, 0xffffffff00000001L, &x275);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(0x0, x269, x271, &x277);
+ { uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x272, 0x0, &x280);
+ { uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, 0x0, x274, &x283);
+ { uint64_t x286; uint8_t _ = _addcarryx_u64(0x0, x284, x275, &x286);
+ { uint64_t _; uint8_t x290 = _addcarryx_u64(0x0, x253, x268, &_);
+ { uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x256, x277, &x292);
+ { uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x259, x280, &x295);
+ { uint64_t x298; uint8_t x299 = _addcarryx_u64(x296, x262, x283, &x298);
+ { uint64_t x301; uint8_t x302 = _addcarryx_u64(x299, x265, x286, &x301);
+ { uint8_t x303 = (x302 + x266);
+ { uint64_t x305; uint8_t x306 = _subborrow_u64(0x0, x292, 0xffffffffffffffffL, &x305);
+ { uint64_t x308; uint8_t x309 = _subborrow_u64(x306, x295, 0xffffffff, &x308);
+ { uint64_t x311; uint8_t x312 = _subborrow_u64(x309, x298, 0x0, &x311);
+ { uint64_t x314; uint8_t x315 = _subborrow_u64(x312, x301, 0xffffffff00000001L, &x314);
+ { uint64_t _; uint8_t x318 = _subborrow_u64(x315, x303, 0x0, &_);
+ { uint64_t x319 = cmovznz(x318, x314, x301);
+ { uint64_t x320 = cmovznz(x318, x311, x298);
+ { uint64_t x321 = cmovznz(x318, x308, x295);
+ { uint64_t x322 = cmovznz(x318, x305, x292);
+ out[0] = x322;
+ out[1] = x321;
+ out[2] = x320;
+ out[3] = x319;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fenz.c b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fenz.c
+++ b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feopp.c b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feopp.c
index 90f64739b..6a7ade989 100644
--- a/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feopp.c
+++ b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feopp.c
@@ -1,37 +1,23 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffff);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(x27, x14, 0x0, &x29);
-{ uint64_t x31 = (x19 & 0xffffffff00000001L);
-{ uint64_t x33; uint8_t _ = _addcarryx_u64(x30, x17, x31, &x33);
-out[0] = x33;
-out[1] = x29;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffff);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(x27, x14, 0x0, &x29);
+ { uint64_t x31 = (x19 & 0xffffffff00000001L);
+ { uint64_t x33; uint8_t _ = _addcarryx_u64(x30, x17, x31, &x33);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x29;
+ out[3] = x33;
+ }}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fesub.c b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fesub.c
index 8f286af64..f78c10471 100644
--- a/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fesub.c
+++ b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fesub.c
@@ -1,37 +1,27 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffff);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x23, 0x0, &x38);
-{ uint64_t x40 = (x28 & 0xffffffff00000001L);
-{ uint64_t x42; uint8_t _ = _addcarryx_u64(x39, x26, x40, &x42);
-out[0] = x42;
-out[1] = x38;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffff);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x23, 0x0, &x38);
+ { uint64_t x40 = (x28 & 0xffffffff00000001L);
+ { uint64_t x42; uint8_t _ = _addcarryx_u64(x39, x26, x40, &x42);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x38;
+ out[3] = x42;
+ }}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e256m2e32m977/feadd.c b/src/Specific/montgomery64_2e256m2e32m977/feadd.c
index c16909cf3..d4568cb8e 100644
--- a/src/Specific/montgomery64_2e256m2e32m977/feadd.c
+++ b/src/Specific/montgomery64_2e256m2e32m977/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffefffffc2fL, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xffffffffffffffffL, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffefffffc2fL, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xffffffffffffffffL, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e256m2e32m977/femul.c b/src/Specific/montgomery64_2e256m2e32m977/femul.c
index 3905eddb0..eda5fd297 100644
--- a/src/Specific/montgomery64_2e256m2e32m977/femul.c
+++ b/src/Specific/montgomery64_2e256m2e32m977/femul.c
@@ -1,140 +1,130 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xd838091dd2253531L, &_);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffefffffc2fL, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
-{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0xffffffffffffffffL, &x54);
-{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
-{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
-{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
-{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
-{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
-{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
-{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
-{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
-{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
-{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
-{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
-{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
-{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
-{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
-{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
-{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
-{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
-{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
-{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
-{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
-{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
-{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xd838091dd2253531L, &_);
-{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffefffffc2fL, &x126);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
-{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
-{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0xffffffffffffffffL, &x135);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
-{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
-{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
-{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
-{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
-{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
-{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
-{ uint8_t x163 = (x162 + x120);
-{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
-{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
-{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
-{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
-{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
-{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
-{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
-{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
-{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
-{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
-{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
-{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xd838091dd2253531L, &_);
-{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffefffffc2fL, &x208);
-{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
-{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
-{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0xffffffffffffffffL, &x217);
-{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
-{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
-{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
-{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
-{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
-{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
-{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
-{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
-{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
-{ uint8_t x245 = (x244 + x202);
-{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
-{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
-{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
-{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
-{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
-{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
-{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
-{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
-{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
-{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
-{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
-{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
-{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
-{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xd838091dd2253531L, &_);
-{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffefffffc2fL, &x290);
-{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
-{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
-{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0xffffffffffffffffL, &x299);
-{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
-{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
-{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
-{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
-{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
-{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
-{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
-{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
-{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
-{ uint8_t x327 = (x326 + x284);
-{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffefffffc2fL, &x329);
-{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
-{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
-{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0xffffffffffffffffL, &x338);
-{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
-{ uint64_t x343 = cmovznz(x342, x338, x325);
-{ uint64_t x344 = cmovznz(x342, x335, x322);
-{ uint64_t x345 = cmovznz(x342, x332, x319);
-{ uint64_t x346 = cmovznz(x342, x329, x316);
-out[0] = x343;
-out[1] = x344;
-out[2] = x345;
-out[3] = x346;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xd838091dd2253531L, &_);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffefffffc2fL, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+ { uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0xffffffffffffffffL, &x54);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+ { uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+ { uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+ { uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+ { uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+ { uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+ { uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+ { uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+ { uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+ { uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+ { uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+ { uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+ { uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+ { uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+ { uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+ { uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+ { uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+ { uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+ { uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+ { uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xd838091dd2253531L, &_);
+ { uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffefffffc2fL, &x126);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+ { uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0xffffffffffffffffL, &x135);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+ { uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+ { uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+ { uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+ { uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+ { uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+ { uint8_t x163 = (x162 + x120);
+ { uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+ { uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+ { uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+ { uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+ { uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+ { uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+ { uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+ { uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+ { uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+ { uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+ { uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+ { uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xd838091dd2253531L, &_);
+ { uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffefffffc2fL, &x208);
+ { uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+ { uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+ { uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0xffffffffffffffffL, &x217);
+ { uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+ { uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+ { uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+ { uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+ { uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+ { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+ { uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+ { uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+ { uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+ { uint8_t x245 = (x244 + x202);
+ { uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+ { uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+ { uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+ { uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+ { uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+ { uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+ { uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+ { uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+ { uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+ { uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xd838091dd2253531L, &_);
+ { uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffefffffc2fL, &x290);
+ { uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+ { uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+ { uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0xffffffffffffffffL, &x299);
+ { uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+ { uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+ { uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+ { uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+ { uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+ { uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+ { uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+ { uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+ { uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+ { uint8_t x327 = (x326 + x284);
+ { uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffefffffc2fL, &x329);
+ { uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+ { uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+ { uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0xffffffffffffffffL, &x338);
+ { uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+ { uint64_t x343 = cmovznz(x342, x338, x325);
+ { uint64_t x344 = cmovznz(x342, x335, x322);
+ { uint64_t x345 = cmovznz(x342, x332, x319);
+ { uint64_t x346 = cmovznz(x342, x329, x316);
+ out[0] = x346;
+ out[1] = x345;
+ out[2] = x344;
+ out[3] = x343;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e256m2e32m977/fenz.c b/src/Specific/montgomery64_2e256m2e32m977/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e256m2e32m977/fenz.c
+++ b/src/Specific/montgomery64_2e256m2e32m977/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e256m2e32m977/feopp.c b/src/Specific/montgomery64_2e256m2e32m977/feopp.c
index 042716ff8..cea8498a4 100644
--- a/src/Specific/montgomery64_2e256m2e32m977/feopp.c
+++ b/src/Specific/montgomery64_2e256m2e32m977/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xfffffffefffffc2fL);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint64_t x32 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xfffffffefffffc2fL);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint64_t x32 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e256m2e32m977/fesub.c b/src/Specific/montgomery64_2e256m2e32m977/fesub.c
index d1a9a043e..ee5e2b88a 100644
--- a/src/Specific/montgomery64_2e256m2e32m977/fesub.c
+++ b/src/Specific/montgomery64_2e256m2e32m977/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xfffffffefffffc2fL);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint64_t x41 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xfffffffefffffc2fL);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint64_t x41 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e256m88x2e240m1/feadd.c b/src/Specific/montgomery64_2e256m88x2e240m1/feadd.c
index 59e978a69..b14154459 100644
--- a/src/Specific/montgomery64_2e256m88x2e240m1/feadd.c
+++ b/src/Specific/montgomery64_2e256m88x2e240m1/feadd.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffffL, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xffa7ffffffffffffL, &x38);
-{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
-{ uint64_t x43 = cmovznz(x42, x38, x26);
-{ uint64_t x44 = cmovznz(x42, x35, x23);
-{ uint64_t x45 = cmovznz(x42, x32, x20);
-{ uint64_t x46 = cmovznz(x42, x29, x17);
-out[0] = x43;
-out[1] = x44;
-out[2] = x45;
-out[3] = x46;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feadd(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffffL, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xffa7ffffffffffffL, &x38);
+ { uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+ { uint64_t x43 = cmovznz(x42, x38, x26);
+ { uint64_t x44 = cmovznz(x42, x35, x23);
+ { uint64_t x45 = cmovznz(x42, x32, x20);
+ { uint64_t x46 = cmovznz(x42, x29, x17);
+ out[0] = x46;
+ out[1] = x45;
+ out[2] = x44;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e256m88x2e240m1/femul.c b/src/Specific/montgomery64_2e256m88x2e240m1/femul.c
index 2382c2c35..20f63131b 100644
--- a/src/Specific/montgomery64_2e256m88x2e240m1/femul.c
+++ b/src/Specific/montgomery64_2e256m88x2e240m1/femul.c
@@ -1,136 +1,126 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
-{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
-{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
-{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
-{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
-{ uint64_t x42; uint64_t x41 = _mulx_u64(x17, 0xffffffffffffffffL, &x42);
-{ uint64_t x45; uint64_t x44 = _mulx_u64(x17, 0xffffffffffffffffL, &x45);
-{ uint64_t x48; uint64_t x47 = _mulx_u64(x17, 0xffffffffffffffffL, &x48);
-{ uint64_t x51; uint64_t x50 = _mulx_u64(x17, 0xffa7ffffffffffffL, &x51);
-{ uint64_t x53; uint8_t x54 = _addcarryx_u64(0x0, x42, x44, &x53);
-{ uint64_t x56; uint8_t x57 = _addcarryx_u64(x54, x45, x47, &x56);
-{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
-{ uint64_t x62; uint8_t _ = _addcarryx_u64(0x0, x60, x51, &x62);
-{ uint64_t _; uint8_t x66 = _addcarryx_u64(0x0, x17, x41, &_);
-{ uint64_t x68; uint8_t x69 = _addcarryx_u64(x66, x29, x53, &x68);
-{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x32, x56, &x71);
-{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x35, x59, &x74);
-{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x38, x62, &x77);
-{ uint64_t x81; uint64_t x80 = _mulx_u64(x7, x11, &x81);
-{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x13, &x84);
-{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x15, &x87);
-{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x14, &x90);
-{ uint64_t x92; uint8_t x93 = _addcarryx_u64(0x0, x81, x83, &x92);
-{ uint64_t x95; uint8_t x96 = _addcarryx_u64(x93, x84, x86, &x95);
-{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
-{ uint64_t x101; uint8_t _ = _addcarryx_u64(0x0, x99, x90, &x101);
-{ uint64_t x104; uint8_t x105 = _addcarryx_u64(0x0, x68, x80, &x104);
-{ uint64_t x107; uint8_t x108 = _addcarryx_u64(x105, x71, x92, &x107);
-{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
-{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
-{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x78, x101, &x116);
-{ uint64_t x120; uint64_t x119 = _mulx_u64(x104, 0xffffffffffffffffL, &x120);
-{ uint64_t x123; uint64_t x122 = _mulx_u64(x104, 0xffffffffffffffffL, &x123);
-{ uint64_t x126; uint64_t x125 = _mulx_u64(x104, 0xffffffffffffffffL, &x126);
-{ uint64_t x129; uint64_t x128 = _mulx_u64(x104, 0xffa7ffffffffffffL, &x129);
-{ uint64_t x131; uint8_t x132 = _addcarryx_u64(0x0, x120, x122, &x131);
-{ uint64_t x134; uint8_t x135 = _addcarryx_u64(x132, x123, x125, &x134);
-{ uint64_t x137; uint8_t x138 = _addcarryx_u64(x135, x126, x128, &x137);
-{ uint64_t x140; uint8_t _ = _addcarryx_u64(0x0, x138, x129, &x140);
-{ uint64_t _; uint8_t x144 = _addcarryx_u64(0x0, x104, x119, &_);
-{ uint64_t x146; uint8_t x147 = _addcarryx_u64(x144, x107, x131, &x146);
-{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x110, x134, &x149);
-{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x113, x137, &x152);
-{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x116, x140, &x155);
-{ uint8_t x157 = (x156 + x117);
-{ uint64_t x160; uint64_t x159 = _mulx_u64(x9, x11, &x160);
-{ uint64_t x163; uint64_t x162 = _mulx_u64(x9, x13, &x163);
-{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x15, &x166);
-{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x14, &x169);
-{ uint64_t x171; uint8_t x172 = _addcarryx_u64(0x0, x160, x162, &x171);
-{ uint64_t x174; uint8_t x175 = _addcarryx_u64(x172, x163, x165, &x174);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(x175, x166, x168, &x177);
-{ uint64_t x180; uint8_t _ = _addcarryx_u64(0x0, x178, x169, &x180);
-{ uint64_t x183; uint8_t x184 = _addcarryx_u64(0x0, x146, x159, &x183);
-{ uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x149, x171, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x152, x174, &x189);
-{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
-{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x157, x180, &x195);
-{ uint64_t x199; uint64_t x198 = _mulx_u64(x183, 0xffffffffffffffffL, &x199);
-{ uint64_t x202; uint64_t x201 = _mulx_u64(x183, 0xffffffffffffffffL, &x202);
-{ uint64_t x205; uint64_t x204 = _mulx_u64(x183, 0xffffffffffffffffL, &x205);
-{ uint64_t x208; uint64_t x207 = _mulx_u64(x183, 0xffa7ffffffffffffL, &x208);
-{ uint64_t x210; uint8_t x211 = _addcarryx_u64(0x0, x199, x201, &x210);
-{ uint64_t x213; uint8_t x214 = _addcarryx_u64(x211, x202, x204, &x213);
-{ uint64_t x216; uint8_t x217 = _addcarryx_u64(x214, x205, x207, &x216);
-{ uint64_t x219; uint8_t _ = _addcarryx_u64(0x0, x217, x208, &x219);
-{ uint64_t _; uint8_t x223 = _addcarryx_u64(0x0, x183, x198, &_);
-{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x186, x210, &x225);
-{ uint64_t x228; uint8_t x229 = _addcarryx_u64(x226, x189, x213, &x228);
-{ uint64_t x231; uint8_t x232 = _addcarryx_u64(x229, x192, x216, &x231);
-{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x195, x219, &x234);
-{ uint8_t x236 = (x235 + x196);
-{ uint64_t x239; uint64_t x238 = _mulx_u64(x8, x11, &x239);
-{ uint64_t x242; uint64_t x241 = _mulx_u64(x8, x13, &x242);
-{ uint64_t x245; uint64_t x244 = _mulx_u64(x8, x15, &x245);
-{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x14, &x248);
-{ uint64_t x250; uint8_t x251 = _addcarryx_u64(0x0, x239, x241, &x250);
-{ uint64_t x253; uint8_t x254 = _addcarryx_u64(x251, x242, x244, &x253);
-{ uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x245, x247, &x256);
-{ uint64_t x259; uint8_t _ = _addcarryx_u64(0x0, x257, x248, &x259);
-{ uint64_t x262; uint8_t x263 = _addcarryx_u64(0x0, x225, x238, &x262);
-{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x228, x250, &x265);
-{ uint64_t x268; uint8_t x269 = _addcarryx_u64(x266, x231, x253, &x268);
-{ uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x234, x256, &x271);
-{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x236, x259, &x274);
-{ uint64_t x278; uint64_t x277 = _mulx_u64(x262, 0xffffffffffffffffL, &x278);
-{ uint64_t x281; uint64_t x280 = _mulx_u64(x262, 0xffffffffffffffffL, &x281);
-{ uint64_t x284; uint64_t x283 = _mulx_u64(x262, 0xffffffffffffffffL, &x284);
-{ uint64_t x287; uint64_t x286 = _mulx_u64(x262, 0xffa7ffffffffffffL, &x287);
-{ uint64_t x289; uint8_t x290 = _addcarryx_u64(0x0, x278, x280, &x289);
-{ uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x281, x283, &x292);
-{ uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x284, x286, &x295);
-{ uint64_t x298; uint8_t _ = _addcarryx_u64(0x0, x296, x287, &x298);
-{ uint64_t _; uint8_t x302 = _addcarryx_u64(0x0, x262, x277, &_);
-{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x265, x289, &x304);
-{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x268, x292, &x307);
-{ uint64_t x310; uint8_t x311 = _addcarryx_u64(x308, x271, x295, &x310);
-{ uint64_t x313; uint8_t x314 = _addcarryx_u64(x311, x274, x298, &x313);
-{ uint8_t x315 = (x314 + x275);
-{ uint64_t x317; uint8_t x318 = _subborrow_u64(0x0, x304, 0xffffffffffffffffL, &x317);
-{ uint64_t x320; uint8_t x321 = _subborrow_u64(x318, x307, 0xffffffffffffffffL, &x320);
-{ uint64_t x323; uint8_t x324 = _subborrow_u64(x321, x310, 0xffffffffffffffffL, &x323);
-{ uint64_t x326; uint8_t x327 = _subborrow_u64(x324, x313, 0xffa7ffffffffffffL, &x326);
-{ uint64_t _; uint8_t x330 = _subborrow_u64(x327, x315, 0x0, &_);
-{ uint64_t x331 = cmovznz(x330, x326, x313);
-{ uint64_t x332 = cmovznz(x330, x323, x310);
-{ uint64_t x333 = cmovznz(x330, x320, x307);
-{ uint64_t x334 = cmovznz(x330, x317, x304);
-out[0] = x331;
-out[1] = x332;
-out[2] = x333;
-out[3] = x334;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+ { uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+ { uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+ { uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+ { uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+ { uint64_t x42; uint64_t x41 = _mulx_u64(x17, 0xffffffffffffffffL, &x42);
+ { uint64_t x45; uint64_t x44 = _mulx_u64(x17, 0xffffffffffffffffL, &x45);
+ { uint64_t x48; uint64_t x47 = _mulx_u64(x17, 0xffffffffffffffffL, &x48);
+ { uint64_t x51; uint64_t x50 = _mulx_u64(x17, 0xffa7ffffffffffffL, &x51);
+ { uint64_t x53; uint8_t x54 = _addcarryx_u64(0x0, x42, x44, &x53);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u64(x54, x45, x47, &x56);
+ { uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+ { uint64_t x62; uint8_t _ = _addcarryx_u64(0x0, x60, x51, &x62);
+ { uint64_t _; uint8_t x66 = _addcarryx_u64(0x0, x17, x41, &_);
+ { uint64_t x68; uint8_t x69 = _addcarryx_u64(x66, x29, x53, &x68);
+ { uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x32, x56, &x71);
+ { uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x35, x59, &x74);
+ { uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x38, x62, &x77);
+ { uint64_t x81; uint64_t x80 = _mulx_u64(x7, x11, &x81);
+ { uint64_t x84; uint64_t x83 = _mulx_u64(x7, x13, &x84);
+ { uint64_t x87; uint64_t x86 = _mulx_u64(x7, x15, &x87);
+ { uint64_t x90; uint64_t x89 = _mulx_u64(x7, x14, &x90);
+ { uint64_t x92; uint8_t x93 = _addcarryx_u64(0x0, x81, x83, &x92);
+ { uint64_t x95; uint8_t x96 = _addcarryx_u64(x93, x84, x86, &x95);
+ { uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+ { uint64_t x101; uint8_t _ = _addcarryx_u64(0x0, x99, x90, &x101);
+ { uint64_t x104; uint8_t x105 = _addcarryx_u64(0x0, x68, x80, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(x105, x71, x92, &x107);
+ { uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+ { uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+ { uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x78, x101, &x116);
+ { uint64_t x120; uint64_t x119 = _mulx_u64(x104, 0xffffffffffffffffL, &x120);
+ { uint64_t x123; uint64_t x122 = _mulx_u64(x104, 0xffffffffffffffffL, &x123);
+ { uint64_t x126; uint64_t x125 = _mulx_u64(x104, 0xffffffffffffffffL, &x126);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x104, 0xffa7ffffffffffffL, &x129);
+ { uint64_t x131; uint8_t x132 = _addcarryx_u64(0x0, x120, x122, &x131);
+ { uint64_t x134; uint8_t x135 = _addcarryx_u64(x132, x123, x125, &x134);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(x135, x126, x128, &x137);
+ { uint64_t x140; uint8_t _ = _addcarryx_u64(0x0, x138, x129, &x140);
+ { uint64_t _; uint8_t x144 = _addcarryx_u64(0x0, x104, x119, &_);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(x144, x107, x131, &x146);
+ { uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x110, x134, &x149);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x113, x137, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x116, x140, &x155);
+ { uint8_t x157 = (x156 + x117);
+ { uint64_t x160; uint64_t x159 = _mulx_u64(x9, x11, &x160);
+ { uint64_t x163; uint64_t x162 = _mulx_u64(x9, x13, &x163);
+ { uint64_t x166; uint64_t x165 = _mulx_u64(x9, x15, &x166);
+ { uint64_t x169; uint64_t x168 = _mulx_u64(x9, x14, &x169);
+ { uint64_t x171; uint8_t x172 = _addcarryx_u64(0x0, x160, x162, &x171);
+ { uint64_t x174; uint8_t x175 = _addcarryx_u64(x172, x163, x165, &x174);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(x175, x166, x168, &x177);
+ { uint64_t x180; uint8_t _ = _addcarryx_u64(0x0, x178, x169, &x180);
+ { uint64_t x183; uint8_t x184 = _addcarryx_u64(0x0, x146, x159, &x183);
+ { uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x149, x171, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x152, x174, &x189);
+ { uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+ { uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x157, x180, &x195);
+ { uint64_t x199; uint64_t x198 = _mulx_u64(x183, 0xffffffffffffffffL, &x199);
+ { uint64_t x202; uint64_t x201 = _mulx_u64(x183, 0xffffffffffffffffL, &x202);
+ { uint64_t x205; uint64_t x204 = _mulx_u64(x183, 0xffffffffffffffffL, &x205);
+ { uint64_t x208; uint64_t x207 = _mulx_u64(x183, 0xffa7ffffffffffffL, &x208);
+ { uint64_t x210; uint8_t x211 = _addcarryx_u64(0x0, x199, x201, &x210);
+ { uint64_t x213; uint8_t x214 = _addcarryx_u64(x211, x202, x204, &x213);
+ { uint64_t x216; uint8_t x217 = _addcarryx_u64(x214, x205, x207, &x216);
+ { uint64_t x219; uint8_t _ = _addcarryx_u64(0x0, x217, x208, &x219);
+ { uint64_t _; uint8_t x223 = _addcarryx_u64(0x0, x183, x198, &_);
+ { uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x186, x210, &x225);
+ { uint64_t x228; uint8_t x229 = _addcarryx_u64(x226, x189, x213, &x228);
+ { uint64_t x231; uint8_t x232 = _addcarryx_u64(x229, x192, x216, &x231);
+ { uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x195, x219, &x234);
+ { uint8_t x236 = (x235 + x196);
+ { uint64_t x239; uint64_t x238 = _mulx_u64(x8, x11, &x239);
+ { uint64_t x242; uint64_t x241 = _mulx_u64(x8, x13, &x242);
+ { uint64_t x245; uint64_t x244 = _mulx_u64(x8, x15, &x245);
+ { uint64_t x248; uint64_t x247 = _mulx_u64(x8, x14, &x248);
+ { uint64_t x250; uint8_t x251 = _addcarryx_u64(0x0, x239, x241, &x250);
+ { uint64_t x253; uint8_t x254 = _addcarryx_u64(x251, x242, x244, &x253);
+ { uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x245, x247, &x256);
+ { uint64_t x259; uint8_t _ = _addcarryx_u64(0x0, x257, x248, &x259);
+ { uint64_t x262; uint8_t x263 = _addcarryx_u64(0x0, x225, x238, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x228, x250, &x265);
+ { uint64_t x268; uint8_t x269 = _addcarryx_u64(x266, x231, x253, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x234, x256, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x236, x259, &x274);
+ { uint64_t x278; uint64_t x277 = _mulx_u64(x262, 0xffffffffffffffffL, &x278);
+ { uint64_t x281; uint64_t x280 = _mulx_u64(x262, 0xffffffffffffffffL, &x281);
+ { uint64_t x284; uint64_t x283 = _mulx_u64(x262, 0xffffffffffffffffL, &x284);
+ { uint64_t x287; uint64_t x286 = _mulx_u64(x262, 0xffa7ffffffffffffL, &x287);
+ { uint64_t x289; uint8_t x290 = _addcarryx_u64(0x0, x278, x280, &x289);
+ { uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x281, x283, &x292);
+ { uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x284, x286, &x295);
+ { uint64_t x298; uint8_t _ = _addcarryx_u64(0x0, x296, x287, &x298);
+ { uint64_t _; uint8_t x302 = _addcarryx_u64(0x0, x262, x277, &_);
+ { uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x265, x289, &x304);
+ { uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x268, x292, &x307);
+ { uint64_t x310; uint8_t x311 = _addcarryx_u64(x308, x271, x295, &x310);
+ { uint64_t x313; uint8_t x314 = _addcarryx_u64(x311, x274, x298, &x313);
+ { uint8_t x315 = (x314 + x275);
+ { uint64_t x317; uint8_t x318 = _subborrow_u64(0x0, x304, 0xffffffffffffffffL, &x317);
+ { uint64_t x320; uint8_t x321 = _subborrow_u64(x318, x307, 0xffffffffffffffffL, &x320);
+ { uint64_t x323; uint8_t x324 = _subborrow_u64(x321, x310, 0xffffffffffffffffL, &x323);
+ { uint64_t x326; uint8_t x327 = _subborrow_u64(x324, x313, 0xffa7ffffffffffffL, &x326);
+ { uint64_t _; uint8_t x330 = _subborrow_u64(x327, x315, 0x0, &_);
+ { uint64_t x331 = cmovznz(x330, x326, x313);
+ { uint64_t x332 = cmovznz(x330, x323, x310);
+ { uint64_t x333 = cmovznz(x330, x320, x307);
+ { uint64_t x334 = cmovznz(x330, x317, x304);
+ out[0] = x334;
+ out[1] = x333;
+ out[2] = x332;
+ out[3] = x331;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e256m88x2e240m1/fenz.c b/src/Specific/montgomery64_2e256m88x2e240m1/fenz.c
index 51bde0513..7541094ff 100644
--- a/src/Specific/montgomery64_2e256m88x2e240m1/fenz.c
+++ b/src/Specific/montgomery64_2e256m88x2e240m1/fenz.c
@@ -1,25 +1,11 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x7 = (x6 | x5);
-{ uint64_t x8 = (x4 | x7);
-{ uint64_t x9 = (x2 | x8);
-out[0] = x9;
-}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x7 = (x6 | x5);
+ { uint64_t x8 = (x4 | x7);
+ { uint64_t x9 = (x2 | x8);
+ out[0] = x9;
+ }}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e256m88x2e240m1/feopp.c b/src/Specific/montgomery64_2e256m88x2e240m1/feopp.c
index 334a368a5..f8f941e71 100644
--- a/src/Specific/montgomery64_2e256m88x2e240m1/feopp.c
+++ b/src/Specific/montgomery64_2e256m88x2e240m1/feopp.c
@@ -1,38 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
-{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
-{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
-{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
-{ uint64_t x20 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
-{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
-{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
-{ uint64_t x32 = (x19 & 0xffa7ffffffffffffL);
-{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
-out[0] = x34;
-out[1] = x30;
-out[2] = x26;
-out[3] = x22;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void feopp(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+ { uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+ { uint64_t x24 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xffffffffffffffffL);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+ { uint64_t x32 = (x19 & 0xffa7ffffffffffffL);
+ { uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e256m88x2e240m1/fesub.c b/src/Specific/montgomery64_2e256m88x2e240m1/fesub.c
index 79ce81a88..73f919d5d 100644
--- a/src/Specific/montgomery64_2e256m88x2e240m1/fesub.c
+++ b/src/Specific/montgomery64_2e256m88x2e240m1/fesub.c
@@ -1,38 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
-{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
-{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
-{ uint64_t x29 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
-{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
-{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
-{ uint64_t x41 = (x28 & 0xffa7ffffffffffffL);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesub(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+ { uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+ { uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+ { uint64_t x29 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+ { uint64_t x33 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+ { uint64_t x37 = (x28 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+ { uint64_t x41 = (x28 & 0xffa7ffffffffffffL);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+ out[0] = x31;
+ out[1] = x35;
+ out[2] = x39;
+ out[3] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e266m3/feadd.c b/src/Specific/montgomery64_2e266m3/feadd.c
index 1d1635437..40fa79633 100644
--- a/src/Specific/montgomery64_2e266m3/feadd.c
+++ b/src/Specific/montgomery64_2e266m3/feadd.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint64_t x21; uint8_t x22 = _addcarryx_u64(0x0, x5, x13, &x21);
-{ uint64_t x24; uint8_t x25 = _addcarryx_u64(x22, x7, x15, &x24);
-{ uint64_t x27; uint8_t x28 = _addcarryx_u64(x25, x9, x17, &x27);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x28, x11, x19, &x30);
-{ uint64_t x33; uint8_t x34 = _addcarryx_u64(x31, x10, x18, &x33);
-{ uint64_t x36; uint8_t x37 = _subborrow_u64(0x0, x21, 0xfffffffffffffffdL, &x36);
-{ uint64_t x39; uint8_t x40 = _subborrow_u64(x37, x24, 0xffffffffffffffffL, &x39);
-{ uint64_t x42; uint8_t x43 = _subborrow_u64(x40, x27, 0xffffffffffffffffL, &x42);
-{ uint64_t x45; uint8_t x46 = _subborrow_u64(x43, x30, 0xffffffffffffffffL, &x45);
-{ uint64_t x48; uint8_t x49 = _subborrow_u64(x46, x33, 0x3ff, &x48);
-{ uint64_t _; uint8_t x52 = _subborrow_u64(x49, x34, 0x0, &_);
-{ uint64_t x53 = cmovznz(x52, x48, x33);
-{ uint64_t x54 = cmovznz(x52, x45, x30);
-{ uint64_t x55 = cmovznz(x52, x42, x27);
-{ uint64_t x56 = cmovznz(x52, x39, x24);
-{ uint64_t x57 = cmovznz(x52, x36, x21);
-out[0] = x53;
-out[1] = x54;
-out[2] = x55;
-out[3] = x56;
-out[4] = x57;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feadd(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x18 = in2[4];
+ { const uint64_t x19 = in2[3];
+ { const uint64_t x17 = in2[2];
+ { const uint64_t x15 = in2[1];
+ { const uint64_t x13 = in2[0];
+ { uint64_t x21; uint8_t x22 = _addcarryx_u64(0x0, x5, x13, &x21);
+ { uint64_t x24; uint8_t x25 = _addcarryx_u64(x22, x7, x15, &x24);
+ { uint64_t x27; uint8_t x28 = _addcarryx_u64(x25, x9, x17, &x27);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x28, x11, x19, &x30);
+ { uint64_t x33; uint8_t x34 = _addcarryx_u64(x31, x10, x18, &x33);
+ { uint64_t x36; uint8_t x37 = _subborrow_u64(0x0, x21, 0xfffffffffffffffdL, &x36);
+ { uint64_t x39; uint8_t x40 = _subborrow_u64(x37, x24, 0xffffffffffffffffL, &x39);
+ { uint64_t x42; uint8_t x43 = _subborrow_u64(x40, x27, 0xffffffffffffffffL, &x42);
+ { uint64_t x45; uint8_t x46 = _subborrow_u64(x43, x30, 0xffffffffffffffffL, &x45);
+ { uint64_t x48; uint8_t x49 = _subborrow_u64(x46, x33, 0x3ff, &x48);
+ { uint64_t _; uint8_t x52 = _subborrow_u64(x49, x34, 0x0, &_);
+ { uint64_t x53 = cmovznz(x52, x48, x33);
+ { uint64_t x54 = cmovznz(x52, x45, x30);
+ { uint64_t x55 = cmovznz(x52, x42, x27);
+ { uint64_t x56 = cmovznz(x52, x39, x24);
+ { uint64_t x57 = cmovznz(x52, x36, x21);
+ out[0] = x57;
+ out[1] = x56;
+ out[2] = x55;
+ out[3] = x54;
+ out[4] = x53;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e266m3/femul.c b/src/Specific/montgomery64_2e266m3/femul.c
index 918a3a579..2b0513193 100644
--- a/src/Specific/montgomery64_2e266m3/femul.c
+++ b/src/Specific/montgomery64_2e266m3/femul.c
@@ -1,200 +1,192 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint64_t x22; uint64_t x21 = _mulx_u64(x5, x13, &x22);
-{ uint64_t x25; uint64_t x24 = _mulx_u64(x5, x15, &x25);
-{ uint64_t x28; uint64_t x27 = _mulx_u64(x5, x17, &x28);
-{ uint64_t x31; uint64_t x30 = _mulx_u64(x5, x19, &x31);
-{ uint64_t x34; uint64_t x33 = _mulx_u64(x5, x18, &x34);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(0x0, x22, x24, &x36);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x25, x27, &x39);
-{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x28, x30, &x42);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x31, x33, &x45);
-{ uint64_t x48; uint8_t _ = _addcarryx_u64(0x0, x46, x34, &x48);
-{ uint64_t _; uint64_t x51 = _mulx_u64(x21, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x55; uint64_t x54 = _mulx_u64(x51, 0xfffffffffffffffdL, &x55);
-{ uint64_t x58; uint64_t x57 = _mulx_u64(x51, 0xffffffffffffffffL, &x58);
-{ uint64_t x61; uint64_t x60 = _mulx_u64(x51, 0xffffffffffffffffL, &x61);
-{ uint64_t x64; uint64_t x63 = _mulx_u64(x51, 0xffffffffffffffffL, &x64);
-{ uint64_t x67; uint64_t x66 = _mulx_u64(x51, 0x3ff, &x67);
-{ uint64_t x69; uint8_t x70 = _addcarryx_u64(0x0, x55, x57, &x69);
-{ uint64_t x72; uint8_t x73 = _addcarryx_u64(x70, x58, x60, &x72);
-{ uint64_t x75; uint8_t x76 = _addcarryx_u64(x73, x61, x63, &x75);
-{ uint64_t x78; uint8_t x79 = _addcarryx_u64(x76, x64, x66, &x78);
-{ uint64_t x81; uint8_t _ = _addcarryx_u64(0x0, x79, x67, &x81);
-{ uint64_t _; uint8_t x85 = _addcarryx_u64(0x0, x21, x54, &_);
-{ uint64_t x87; uint8_t x88 = _addcarryx_u64(x85, x36, x69, &x87);
-{ uint64_t x90; uint8_t x91 = _addcarryx_u64(x88, x39, x72, &x90);
-{ uint64_t x93; uint8_t x94 = _addcarryx_u64(x91, x42, x75, &x93);
-{ uint64_t x96; uint8_t x97 = _addcarryx_u64(x94, x45, x78, &x96);
-{ uint64_t x99; uint8_t x100 = _addcarryx_u64(x97, x48, x81, &x99);
-{ uint64_t x103; uint64_t x102 = _mulx_u64(x7, x13, &x103);
-{ uint64_t x106; uint64_t x105 = _mulx_u64(x7, x15, &x106);
-{ uint64_t x109; uint64_t x108 = _mulx_u64(x7, x17, &x109);
-{ uint64_t x112; uint64_t x111 = _mulx_u64(x7, x19, &x112);
-{ uint64_t x115; uint64_t x114 = _mulx_u64(x7, x18, &x115);
-{ uint64_t x117; uint8_t x118 = _addcarryx_u64(0x0, x103, x105, &x117);
-{ uint64_t x120; uint8_t x121 = _addcarryx_u64(x118, x106, x108, &x120);
-{ uint64_t x123; uint8_t x124 = _addcarryx_u64(x121, x109, x111, &x123);
-{ uint64_t x126; uint8_t x127 = _addcarryx_u64(x124, x112, x114, &x126);
-{ uint64_t x129; uint8_t _ = _addcarryx_u64(0x0, x127, x115, &x129);
-{ uint64_t x132; uint8_t x133 = _addcarryx_u64(0x0, x87, x102, &x132);
-{ uint64_t x135; uint8_t x136 = _addcarryx_u64(x133, x90, x117, &x135);
-{ uint64_t x138; uint8_t x139 = _addcarryx_u64(x136, x93, x120, &x138);
-{ uint64_t x141; uint8_t x142 = _addcarryx_u64(x139, x96, x123, &x141);
-{ uint64_t x144; uint8_t x145 = _addcarryx_u64(x142, x99, x126, &x144);
-{ uint64_t x147; uint8_t x148 = _addcarryx_u64(x145, x100, x129, &x147);
-{ uint64_t _; uint64_t x150 = _mulx_u64(x132, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x154; uint64_t x153 = _mulx_u64(x150, 0xfffffffffffffffdL, &x154);
-{ uint64_t x157; uint64_t x156 = _mulx_u64(x150, 0xffffffffffffffffL, &x157);
-{ uint64_t x160; uint64_t x159 = _mulx_u64(x150, 0xffffffffffffffffL, &x160);
-{ uint64_t x163; uint64_t x162 = _mulx_u64(x150, 0xffffffffffffffffL, &x163);
-{ uint64_t x166; uint64_t x165 = _mulx_u64(x150, 0x3ff, &x166);
-{ uint64_t x168; uint8_t x169 = _addcarryx_u64(0x0, x154, x156, &x168);
-{ uint64_t x171; uint8_t x172 = _addcarryx_u64(x169, x157, x159, &x171);
-{ uint64_t x174; uint8_t x175 = _addcarryx_u64(x172, x160, x162, &x174);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(x175, x163, x165, &x177);
-{ uint64_t x180; uint8_t _ = _addcarryx_u64(0x0, x178, x166, &x180);
-{ uint64_t _; uint8_t x184 = _addcarryx_u64(0x0, x132, x153, &_);
-{ uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x135, x168, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x138, x171, &x189);
-{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x141, x174, &x192);
-{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x144, x177, &x195);
-{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x147, x180, &x198);
-{ uint8_t x200 = (x199 + x148);
-{ uint64_t x203; uint64_t x202 = _mulx_u64(x9, x13, &x203);
-{ uint64_t x206; uint64_t x205 = _mulx_u64(x9, x15, &x206);
-{ uint64_t x209; uint64_t x208 = _mulx_u64(x9, x17, &x209);
-{ uint64_t x212; uint64_t x211 = _mulx_u64(x9, x19, &x212);
-{ uint64_t x215; uint64_t x214 = _mulx_u64(x9, x18, &x215);
-{ uint64_t x217; uint8_t x218 = _addcarryx_u64(0x0, x203, x205, &x217);
-{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x206, x208, &x220);
-{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x209, x211, &x223);
-{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x212, x214, &x226);
-{ uint64_t x229; uint8_t _ = _addcarryx_u64(0x0, x227, x215, &x229);
-{ uint64_t x232; uint8_t x233 = _addcarryx_u64(0x0, x186, x202, &x232);
-{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x189, x217, &x235);
-{ uint64_t x238; uint8_t x239 = _addcarryx_u64(x236, x192, x220, &x238);
-{ uint64_t x241; uint8_t x242 = _addcarryx_u64(x239, x195, x223, &x241);
-{ uint64_t x244; uint8_t x245 = _addcarryx_u64(x242, x198, x226, &x244);
-{ uint64_t x247; uint8_t x248 = _addcarryx_u64(x245, x200, x229, &x247);
-{ uint64_t _; uint64_t x250 = _mulx_u64(x232, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x254; uint64_t x253 = _mulx_u64(x250, 0xfffffffffffffffdL, &x254);
-{ uint64_t x257; uint64_t x256 = _mulx_u64(x250, 0xffffffffffffffffL, &x257);
-{ uint64_t x260; uint64_t x259 = _mulx_u64(x250, 0xffffffffffffffffL, &x260);
-{ uint64_t x263; uint64_t x262 = _mulx_u64(x250, 0xffffffffffffffffL, &x263);
-{ uint64_t x266; uint64_t x265 = _mulx_u64(x250, 0x3ff, &x266);
-{ uint64_t x268; uint8_t x269 = _addcarryx_u64(0x0, x254, x256, &x268);
-{ uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x257, x259, &x271);
-{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x260, x262, &x274);
-{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x263, x265, &x277);
-{ uint64_t x280; uint8_t _ = _addcarryx_u64(0x0, x278, x266, &x280);
-{ uint64_t _; uint8_t x284 = _addcarryx_u64(0x0, x232, x253, &_);
-{ uint64_t x286; uint8_t x287 = _addcarryx_u64(x284, x235, x268, &x286);
-{ uint64_t x289; uint8_t x290 = _addcarryx_u64(x287, x238, x271, &x289);
-{ uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x241, x274, &x292);
-{ uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x244, x277, &x295);
-{ uint64_t x298; uint8_t x299 = _addcarryx_u64(x296, x247, x280, &x298);
-{ uint8_t x300 = (x299 + x248);
-{ uint64_t x303; uint64_t x302 = _mulx_u64(x11, x13, &x303);
-{ uint64_t x306; uint64_t x305 = _mulx_u64(x11, x15, &x306);
-{ uint64_t x309; uint64_t x308 = _mulx_u64(x11, x17, &x309);
-{ uint64_t x312; uint64_t x311 = _mulx_u64(x11, x19, &x312);
-{ uint64_t x315; uint64_t x314 = _mulx_u64(x11, x18, &x315);
-{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x303, x305, &x317);
-{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x306, x308, &x320);
-{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x309, x311, &x323);
-{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x312, x314, &x326);
-{ uint64_t x329; uint8_t _ = _addcarryx_u64(0x0, x327, x315, &x329);
-{ uint64_t x332; uint8_t x333 = _addcarryx_u64(0x0, x286, x302, &x332);
-{ uint64_t x335; uint8_t x336 = _addcarryx_u64(x333, x289, x317, &x335);
-{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x292, x320, &x338);
-{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x295, x323, &x341);
-{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x298, x326, &x344);
-{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x300, x329, &x347);
-{ uint64_t _; uint64_t x350 = _mulx_u64(x332, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x354; uint64_t x353 = _mulx_u64(x350, 0xfffffffffffffffdL, &x354);
-{ uint64_t x357; uint64_t x356 = _mulx_u64(x350, 0xffffffffffffffffL, &x357);
-{ uint64_t x360; uint64_t x359 = _mulx_u64(x350, 0xffffffffffffffffL, &x360);
-{ uint64_t x363; uint64_t x362 = _mulx_u64(x350, 0xffffffffffffffffL, &x363);
-{ uint64_t x366; uint64_t x365 = _mulx_u64(x350, 0x3ff, &x366);
-{ uint64_t x368; uint8_t x369 = _addcarryx_u64(0x0, x354, x356, &x368);
-{ uint64_t x371; uint8_t x372 = _addcarryx_u64(x369, x357, x359, &x371);
-{ uint64_t x374; uint8_t x375 = _addcarryx_u64(x372, x360, x362, &x374);
-{ uint64_t x377; uint8_t x378 = _addcarryx_u64(x375, x363, x365, &x377);
-{ uint64_t x380; uint8_t _ = _addcarryx_u64(0x0, x378, x366, &x380);
-{ uint64_t _; uint8_t x384 = _addcarryx_u64(0x0, x332, x353, &_);
-{ uint64_t x386; uint8_t x387 = _addcarryx_u64(x384, x335, x368, &x386);
-{ uint64_t x389; uint8_t x390 = _addcarryx_u64(x387, x338, x371, &x389);
-{ uint64_t x392; uint8_t x393 = _addcarryx_u64(x390, x341, x374, &x392);
-{ uint64_t x395; uint8_t x396 = _addcarryx_u64(x393, x344, x377, &x395);
-{ uint64_t x398; uint8_t x399 = _addcarryx_u64(x396, x347, x380, &x398);
-{ uint8_t x400 = (x399 + x348);
-{ uint64_t x403; uint64_t x402 = _mulx_u64(x10, x13, &x403);
-{ uint64_t x406; uint64_t x405 = _mulx_u64(x10, x15, &x406);
-{ uint64_t x409; uint64_t x408 = _mulx_u64(x10, x17, &x409);
-{ uint64_t x412; uint64_t x411 = _mulx_u64(x10, x19, &x412);
-{ uint64_t x415; uint64_t x414 = _mulx_u64(x10, x18, &x415);
-{ uint64_t x417; uint8_t x418 = _addcarryx_u64(0x0, x403, x405, &x417);
-{ uint64_t x420; uint8_t x421 = _addcarryx_u64(x418, x406, x408, &x420);
-{ uint64_t x423; uint8_t x424 = _addcarryx_u64(x421, x409, x411, &x423);
-{ uint64_t x426; uint8_t x427 = _addcarryx_u64(x424, x412, x414, &x426);
-{ uint64_t x429; uint8_t _ = _addcarryx_u64(0x0, x427, x415, &x429);
-{ uint64_t x432; uint8_t x433 = _addcarryx_u64(0x0, x386, x402, &x432);
-{ uint64_t x435; uint8_t x436 = _addcarryx_u64(x433, x389, x417, &x435);
-{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x392, x420, &x438);
-{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x395, x423, &x441);
-{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x398, x426, &x444);
-{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x400, x429, &x447);
-{ uint64_t _; uint64_t x450 = _mulx_u64(x432, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x454; uint64_t x453 = _mulx_u64(x450, 0xfffffffffffffffdL, &x454);
-{ uint64_t x457; uint64_t x456 = _mulx_u64(x450, 0xffffffffffffffffL, &x457);
-{ uint64_t x460; uint64_t x459 = _mulx_u64(x450, 0xffffffffffffffffL, &x460);
-{ uint64_t x463; uint64_t x462 = _mulx_u64(x450, 0xffffffffffffffffL, &x463);
-{ uint64_t x466; uint64_t x465 = _mulx_u64(x450, 0x3ff, &x466);
-{ uint64_t x468; uint8_t x469 = _addcarryx_u64(0x0, x454, x456, &x468);
-{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x457, x459, &x471);
-{ uint64_t x474; uint8_t x475 = _addcarryx_u64(x472, x460, x462, &x474);
-{ uint64_t x477; uint8_t x478 = _addcarryx_u64(x475, x463, x465, &x477);
-{ uint64_t x480; uint8_t _ = _addcarryx_u64(0x0, x478, x466, &x480);
-{ uint64_t _; uint8_t x484 = _addcarryx_u64(0x0, x432, x453, &_);
-{ uint64_t x486; uint8_t x487 = _addcarryx_u64(x484, x435, x468, &x486);
-{ uint64_t x489; uint8_t x490 = _addcarryx_u64(x487, x438, x471, &x489);
-{ uint64_t x492; uint8_t x493 = _addcarryx_u64(x490, x441, x474, &x492);
-{ uint64_t x495; uint8_t x496 = _addcarryx_u64(x493, x444, x477, &x495);
-{ uint64_t x498; uint8_t x499 = _addcarryx_u64(x496, x447, x480, &x498);
-{ uint8_t x500 = (x499 + x448);
-{ uint64_t x502; uint8_t x503 = _subborrow_u64(0x0, x486, 0xfffffffffffffffdL, &x502);
-{ uint64_t x505; uint8_t x506 = _subborrow_u64(x503, x489, 0xffffffffffffffffL, &x505);
-{ uint64_t x508; uint8_t x509 = _subborrow_u64(x506, x492, 0xffffffffffffffffL, &x508);
-{ uint64_t x511; uint8_t x512 = _subborrow_u64(x509, x495, 0xffffffffffffffffL, &x511);
-{ uint64_t x514; uint8_t x515 = _subborrow_u64(x512, x498, 0x3ff, &x514);
-{ uint64_t _; uint8_t x518 = _subborrow_u64(x515, x500, 0x0, &_);
-{ uint64_t x519 = cmovznz(x518, x514, x498);
-{ uint64_t x520 = cmovznz(x518, x511, x495);
-{ uint64_t x521 = cmovznz(x518, x508, x492);
-{ uint64_t x522 = cmovznz(x518, x505, x489);
-{ uint64_t x523 = cmovznz(x518, x502, x486);
-out[0] = x519;
-out[1] = x520;
-out[2] = x521;
-out[3] = x522;
-out[4] = x523;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x18 = in2[4];
+ { const uint64_t x19 = in2[3];
+ { const uint64_t x17 = in2[2];
+ { const uint64_t x15 = in2[1];
+ { const uint64_t x13 = in2[0];
+ { uint64_t x22; uint64_t x21 = _mulx_u64(x5, x13, &x22);
+ { uint64_t x25; uint64_t x24 = _mulx_u64(x5, x15, &x25);
+ { uint64_t x28; uint64_t x27 = _mulx_u64(x5, x17, &x28);
+ { uint64_t x31; uint64_t x30 = _mulx_u64(x5, x19, &x31);
+ { uint64_t x34; uint64_t x33 = _mulx_u64(x5, x18, &x34);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(0x0, x22, x24, &x36);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x25, x27, &x39);
+ { uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x28, x30, &x42);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x31, x33, &x45);
+ { uint64_t x48; uint8_t _ = _addcarryx_u64(0x0, x46, x34, &x48);
+ { uint64_t _; uint64_t x51 = _mulx_u64(x21, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x55; uint64_t x54 = _mulx_u64(x51, 0xfffffffffffffffdL, &x55);
+ { uint64_t x58; uint64_t x57 = _mulx_u64(x51, 0xffffffffffffffffL, &x58);
+ { uint64_t x61; uint64_t x60 = _mulx_u64(x51, 0xffffffffffffffffL, &x61);
+ { uint64_t x64; uint64_t x63 = _mulx_u64(x51, 0xffffffffffffffffL, &x64);
+ { uint64_t x67; uint64_t x66 = _mulx_u64(x51, 0x3ff, &x67);
+ { uint64_t x69; uint8_t x70 = _addcarryx_u64(0x0, x55, x57, &x69);
+ { uint64_t x72; uint8_t x73 = _addcarryx_u64(x70, x58, x60, &x72);
+ { uint64_t x75; uint8_t x76 = _addcarryx_u64(x73, x61, x63, &x75);
+ { uint64_t x78; uint8_t x79 = _addcarryx_u64(x76, x64, x66, &x78);
+ { uint64_t x81; uint8_t _ = _addcarryx_u64(0x0, x79, x67, &x81);
+ { uint64_t _; uint8_t x85 = _addcarryx_u64(0x0, x21, x54, &_);
+ { uint64_t x87; uint8_t x88 = _addcarryx_u64(x85, x36, x69, &x87);
+ { uint64_t x90; uint8_t x91 = _addcarryx_u64(x88, x39, x72, &x90);
+ { uint64_t x93; uint8_t x94 = _addcarryx_u64(x91, x42, x75, &x93);
+ { uint64_t x96; uint8_t x97 = _addcarryx_u64(x94, x45, x78, &x96);
+ { uint64_t x99; uint8_t x100 = _addcarryx_u64(x97, x48, x81, &x99);
+ { uint64_t x103; uint64_t x102 = _mulx_u64(x7, x13, &x103);
+ { uint64_t x106; uint64_t x105 = _mulx_u64(x7, x15, &x106);
+ { uint64_t x109; uint64_t x108 = _mulx_u64(x7, x17, &x109);
+ { uint64_t x112; uint64_t x111 = _mulx_u64(x7, x19, &x112);
+ { uint64_t x115; uint64_t x114 = _mulx_u64(x7, x18, &x115);
+ { uint64_t x117; uint8_t x118 = _addcarryx_u64(0x0, x103, x105, &x117);
+ { uint64_t x120; uint8_t x121 = _addcarryx_u64(x118, x106, x108, &x120);
+ { uint64_t x123; uint8_t x124 = _addcarryx_u64(x121, x109, x111, &x123);
+ { uint64_t x126; uint8_t x127 = _addcarryx_u64(x124, x112, x114, &x126);
+ { uint64_t x129; uint8_t _ = _addcarryx_u64(0x0, x127, x115, &x129);
+ { uint64_t x132; uint8_t x133 = _addcarryx_u64(0x0, x87, x102, &x132);
+ { uint64_t x135; uint8_t x136 = _addcarryx_u64(x133, x90, x117, &x135);
+ { uint64_t x138; uint8_t x139 = _addcarryx_u64(x136, x93, x120, &x138);
+ { uint64_t x141; uint8_t x142 = _addcarryx_u64(x139, x96, x123, &x141);
+ { uint64_t x144; uint8_t x145 = _addcarryx_u64(x142, x99, x126, &x144);
+ { uint64_t x147; uint8_t x148 = _addcarryx_u64(x145, x100, x129, &x147);
+ { uint64_t _; uint64_t x150 = _mulx_u64(x132, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x154; uint64_t x153 = _mulx_u64(x150, 0xfffffffffffffffdL, &x154);
+ { uint64_t x157; uint64_t x156 = _mulx_u64(x150, 0xffffffffffffffffL, &x157);
+ { uint64_t x160; uint64_t x159 = _mulx_u64(x150, 0xffffffffffffffffL, &x160);
+ { uint64_t x163; uint64_t x162 = _mulx_u64(x150, 0xffffffffffffffffL, &x163);
+ { uint64_t x166; uint64_t x165 = _mulx_u64(x150, 0x3ff, &x166);
+ { uint64_t x168; uint8_t x169 = _addcarryx_u64(0x0, x154, x156, &x168);
+ { uint64_t x171; uint8_t x172 = _addcarryx_u64(x169, x157, x159, &x171);
+ { uint64_t x174; uint8_t x175 = _addcarryx_u64(x172, x160, x162, &x174);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(x175, x163, x165, &x177);
+ { uint64_t x180; uint8_t _ = _addcarryx_u64(0x0, x178, x166, &x180);
+ { uint64_t _; uint8_t x184 = _addcarryx_u64(0x0, x132, x153, &_);
+ { uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x135, x168, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x138, x171, &x189);
+ { uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x141, x174, &x192);
+ { uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x144, x177, &x195);
+ { uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x147, x180, &x198);
+ { uint8_t x200 = (x199 + x148);
+ { uint64_t x203; uint64_t x202 = _mulx_u64(x9, x13, &x203);
+ { uint64_t x206; uint64_t x205 = _mulx_u64(x9, x15, &x206);
+ { uint64_t x209; uint64_t x208 = _mulx_u64(x9, x17, &x209);
+ { uint64_t x212; uint64_t x211 = _mulx_u64(x9, x19, &x212);
+ { uint64_t x215; uint64_t x214 = _mulx_u64(x9, x18, &x215);
+ { uint64_t x217; uint8_t x218 = _addcarryx_u64(0x0, x203, x205, &x217);
+ { uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x206, x208, &x220);
+ { uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x209, x211, &x223);
+ { uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x212, x214, &x226);
+ { uint64_t x229; uint8_t _ = _addcarryx_u64(0x0, x227, x215, &x229);
+ { uint64_t x232; uint8_t x233 = _addcarryx_u64(0x0, x186, x202, &x232);
+ { uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x189, x217, &x235);
+ { uint64_t x238; uint8_t x239 = _addcarryx_u64(x236, x192, x220, &x238);
+ { uint64_t x241; uint8_t x242 = _addcarryx_u64(x239, x195, x223, &x241);
+ { uint64_t x244; uint8_t x245 = _addcarryx_u64(x242, x198, x226, &x244);
+ { uint64_t x247; uint8_t x248 = _addcarryx_u64(x245, x200, x229, &x247);
+ { uint64_t _; uint64_t x250 = _mulx_u64(x232, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x254; uint64_t x253 = _mulx_u64(x250, 0xfffffffffffffffdL, &x254);
+ { uint64_t x257; uint64_t x256 = _mulx_u64(x250, 0xffffffffffffffffL, &x257);
+ { uint64_t x260; uint64_t x259 = _mulx_u64(x250, 0xffffffffffffffffL, &x260);
+ { uint64_t x263; uint64_t x262 = _mulx_u64(x250, 0xffffffffffffffffL, &x263);
+ { uint64_t x266; uint64_t x265 = _mulx_u64(x250, 0x3ff, &x266);
+ { uint64_t x268; uint8_t x269 = _addcarryx_u64(0x0, x254, x256, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x257, x259, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x260, x262, &x274);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x263, x265, &x277);
+ { uint64_t x280; uint8_t _ = _addcarryx_u64(0x0, x278, x266, &x280);
+ { uint64_t _; uint8_t x284 = _addcarryx_u64(0x0, x232, x253, &_);
+ { uint64_t x286; uint8_t x287 = _addcarryx_u64(x284, x235, x268, &x286);
+ { uint64_t x289; uint8_t x290 = _addcarryx_u64(x287, x238, x271, &x289);
+ { uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x241, x274, &x292);
+ { uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x244, x277, &x295);
+ { uint64_t x298; uint8_t x299 = _addcarryx_u64(x296, x247, x280, &x298);
+ { uint8_t x300 = (x299 + x248);
+ { uint64_t x303; uint64_t x302 = _mulx_u64(x11, x13, &x303);
+ { uint64_t x306; uint64_t x305 = _mulx_u64(x11, x15, &x306);
+ { uint64_t x309; uint64_t x308 = _mulx_u64(x11, x17, &x309);
+ { uint64_t x312; uint64_t x311 = _mulx_u64(x11, x19, &x312);
+ { uint64_t x315; uint64_t x314 = _mulx_u64(x11, x18, &x315);
+ { uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x303, x305, &x317);
+ { uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x306, x308, &x320);
+ { uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x309, x311, &x323);
+ { uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x312, x314, &x326);
+ { uint64_t x329; uint8_t _ = _addcarryx_u64(0x0, x327, x315, &x329);
+ { uint64_t x332; uint8_t x333 = _addcarryx_u64(0x0, x286, x302, &x332);
+ { uint64_t x335; uint8_t x336 = _addcarryx_u64(x333, x289, x317, &x335);
+ { uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x292, x320, &x338);
+ { uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x295, x323, &x341);
+ { uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x298, x326, &x344);
+ { uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x300, x329, &x347);
+ { uint64_t _; uint64_t x350 = _mulx_u64(x332, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x354; uint64_t x353 = _mulx_u64(x350, 0xfffffffffffffffdL, &x354);
+ { uint64_t x357; uint64_t x356 = _mulx_u64(x350, 0xffffffffffffffffL, &x357);
+ { uint64_t x360; uint64_t x359 = _mulx_u64(x350, 0xffffffffffffffffL, &x360);
+ { uint64_t x363; uint64_t x362 = _mulx_u64(x350, 0xffffffffffffffffL, &x363);
+ { uint64_t x366; uint64_t x365 = _mulx_u64(x350, 0x3ff, &x366);
+ { uint64_t x368; uint8_t x369 = _addcarryx_u64(0x0, x354, x356, &x368);
+ { uint64_t x371; uint8_t x372 = _addcarryx_u64(x369, x357, x359, &x371);
+ { uint64_t x374; uint8_t x375 = _addcarryx_u64(x372, x360, x362, &x374);
+ { uint64_t x377; uint8_t x378 = _addcarryx_u64(x375, x363, x365, &x377);
+ { uint64_t x380; uint8_t _ = _addcarryx_u64(0x0, x378, x366, &x380);
+ { uint64_t _; uint8_t x384 = _addcarryx_u64(0x0, x332, x353, &_);
+ { uint64_t x386; uint8_t x387 = _addcarryx_u64(x384, x335, x368, &x386);
+ { uint64_t x389; uint8_t x390 = _addcarryx_u64(x387, x338, x371, &x389);
+ { uint64_t x392; uint8_t x393 = _addcarryx_u64(x390, x341, x374, &x392);
+ { uint64_t x395; uint8_t x396 = _addcarryx_u64(x393, x344, x377, &x395);
+ { uint64_t x398; uint8_t x399 = _addcarryx_u64(x396, x347, x380, &x398);
+ { uint8_t x400 = (x399 + x348);
+ { uint64_t x403; uint64_t x402 = _mulx_u64(x10, x13, &x403);
+ { uint64_t x406; uint64_t x405 = _mulx_u64(x10, x15, &x406);
+ { uint64_t x409; uint64_t x408 = _mulx_u64(x10, x17, &x409);
+ { uint64_t x412; uint64_t x411 = _mulx_u64(x10, x19, &x412);
+ { uint64_t x415; uint64_t x414 = _mulx_u64(x10, x18, &x415);
+ { uint64_t x417; uint8_t x418 = _addcarryx_u64(0x0, x403, x405, &x417);
+ { uint64_t x420; uint8_t x421 = _addcarryx_u64(x418, x406, x408, &x420);
+ { uint64_t x423; uint8_t x424 = _addcarryx_u64(x421, x409, x411, &x423);
+ { uint64_t x426; uint8_t x427 = _addcarryx_u64(x424, x412, x414, &x426);
+ { uint64_t x429; uint8_t _ = _addcarryx_u64(0x0, x427, x415, &x429);
+ { uint64_t x432; uint8_t x433 = _addcarryx_u64(0x0, x386, x402, &x432);
+ { uint64_t x435; uint8_t x436 = _addcarryx_u64(x433, x389, x417, &x435);
+ { uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x392, x420, &x438);
+ { uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x395, x423, &x441);
+ { uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x398, x426, &x444);
+ { uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x400, x429, &x447);
+ { uint64_t _; uint64_t x450 = _mulx_u64(x432, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x454; uint64_t x453 = _mulx_u64(x450, 0xfffffffffffffffdL, &x454);
+ { uint64_t x457; uint64_t x456 = _mulx_u64(x450, 0xffffffffffffffffL, &x457);
+ { uint64_t x460; uint64_t x459 = _mulx_u64(x450, 0xffffffffffffffffL, &x460);
+ { uint64_t x463; uint64_t x462 = _mulx_u64(x450, 0xffffffffffffffffL, &x463);
+ { uint64_t x466; uint64_t x465 = _mulx_u64(x450, 0x3ff, &x466);
+ { uint64_t x468; uint8_t x469 = _addcarryx_u64(0x0, x454, x456, &x468);
+ { uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x457, x459, &x471);
+ { uint64_t x474; uint8_t x475 = _addcarryx_u64(x472, x460, x462, &x474);
+ { uint64_t x477; uint8_t x478 = _addcarryx_u64(x475, x463, x465, &x477);
+ { uint64_t x480; uint8_t _ = _addcarryx_u64(0x0, x478, x466, &x480);
+ { uint64_t _; uint8_t x484 = _addcarryx_u64(0x0, x432, x453, &_);
+ { uint64_t x486; uint8_t x487 = _addcarryx_u64(x484, x435, x468, &x486);
+ { uint64_t x489; uint8_t x490 = _addcarryx_u64(x487, x438, x471, &x489);
+ { uint64_t x492; uint8_t x493 = _addcarryx_u64(x490, x441, x474, &x492);
+ { uint64_t x495; uint8_t x496 = _addcarryx_u64(x493, x444, x477, &x495);
+ { uint64_t x498; uint8_t x499 = _addcarryx_u64(x496, x447, x480, &x498);
+ { uint8_t x500 = (x499 + x448);
+ { uint64_t x502; uint8_t x503 = _subborrow_u64(0x0, x486, 0xfffffffffffffffdL, &x502);
+ { uint64_t x505; uint8_t x506 = _subborrow_u64(x503, x489, 0xffffffffffffffffL, &x505);
+ { uint64_t x508; uint8_t x509 = _subborrow_u64(x506, x492, 0xffffffffffffffffL, &x508);
+ { uint64_t x511; uint8_t x512 = _subborrow_u64(x509, x495, 0xffffffffffffffffL, &x511);
+ { uint64_t x514; uint8_t x515 = _subborrow_u64(x512, x498, 0x3ff, &x514);
+ { uint64_t _; uint8_t x518 = _subborrow_u64(x515, x500, 0x0, &_);
+ { uint64_t x519 = cmovznz(x518, x514, x498);
+ { uint64_t x520 = cmovznz(x518, x511, x495);
+ { uint64_t x521 = cmovznz(x518, x508, x492);
+ { uint64_t x522 = cmovznz(x518, x505, x489);
+ { uint64_t x523 = cmovznz(x518, x502, x486);
+ out[0] = x523;
+ out[1] = x522;
+ out[2] = x521;
+ out[3] = x520;
+ out[4] = x519;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e266m3/fenz.c b/src/Specific/montgomery64_2e266m3/fenz.c
index aaabff8a7..0a779af7c 100644
--- a/src/Specific/montgomery64_2e266m3/fenz.c
+++ b/src/Specific/montgomery64_2e266m3/fenz.c
@@ -1,26 +1,13 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x9 = (x8 | x7);
-{ uint64_t x10 = (x6 | x9);
-{ uint64_t x11 = (x4 | x10);
-{ uint64_t x12 = (x2 | x11);
-out[0] = x12;
-}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x9 = (x8 | x7);
+ { uint64_t x10 = (x6 | x9);
+ { uint64_t x11 = (x4 | x10);
+ { uint64_t x12 = (x2 | x11);
+ out[0] = x12;
+ }}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e266m3/feopp.c b/src/Specific/montgomery64_2e266m3/feopp.c
index 4aa4357c9..07873bc8f 100644
--- a/src/Specific/montgomery64_2e266m3/feopp.c
+++ b/src/Specific/montgomery64_2e266m3/feopp.c
@@ -1,42 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x10; uint8_t x11 = _subborrow_u64(0x0, 0x0, x2, &x10);
-{ uint64_t x13; uint8_t x14 = _subborrow_u64(x11, 0x0, x4, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, 0x0, x6, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, 0x0, x8, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u64(x20, 0x0, x7, &x22);
-{ uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
-{ uint64_t x25 = (x24 & 0xfffffffffffffffdL);
-{ uint64_t x27; uint8_t x28 = _addcarryx_u64(0x0, x10, x25, &x27);
-{ uint64_t x29 = (x24 & 0xffffffffffffffffL);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x28, x13, x29, &x31);
-{ uint64_t x33 = (x24 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x16, x33, &x35);
-{ uint64_t x37 = (x24 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x19, x37, &x39);
-{ uint64_t x41 = (x24 & 0x3ff);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x22, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-out[4] = x27;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feopp(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x10; uint8_t x11 = _subborrow_u64(0x0, 0x0, x2, &x10);
+ { uint64_t x13; uint8_t x14 = _subborrow_u64(x11, 0x0, x4, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u64(x14, 0x0, x6, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u64(x17, 0x0, x8, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u64(x20, 0x0, x7, &x22);
+ { uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
+ { uint64_t x25 = (x24 & 0xfffffffffffffffdL);
+ { uint64_t x27; uint8_t x28 = _addcarryx_u64(0x0, x10, x25, &x27);
+ { uint64_t x29 = (x24 & 0xffffffffffffffffL);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(x28, x13, x29, &x31);
+ { uint64_t x33 = (x24 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x16, x33, &x35);
+ { uint64_t x37 = (x24 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x19, x37, &x39);
+ { uint64_t x41 = (x24 & 0x3ff);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x22, x41, &x43);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e266m3/fesub.c b/src/Specific/montgomery64_2e266m3/fesub.c
index 56dfeab8d..fa38ecbb3 100644
--- a/src/Specific/montgomery64_2e266m3/fesub.c
+++ b/src/Specific/montgomery64_2e266m3/fesub.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint64_t x21; uint8_t x22 = _subborrow_u64(0x0, x5, x13, &x21);
-{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, x7, x15, &x24);
-{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, x9, x17, &x27);
-{ uint64_t x30; uint8_t x31 = _subborrow_u64(x28, x11, x19, &x30);
-{ uint64_t x33; uint8_t x34 = _subborrow_u64(x31, x10, x18, &x33);
-{ uint64_t x35 = (uint64_t)cmovznz(x34, 0x0, 0xffffffffffffffffL);
-{ uint64_t x36 = (x35 & 0xfffffffffffffffdL);
-{ uint64_t x38; uint8_t x39 = _addcarryx_u64(0x0, x21, x36, &x38);
-{ uint64_t x40 = (x35 & 0xffffffffffffffffL);
-{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x39, x24, x40, &x42);
-{ uint64_t x44 = (x35 & 0xffffffffffffffffL);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x43, x27, x44, &x46);
-{ uint64_t x48 = (x35 & 0xffffffffffffffffL);
-{ uint64_t x50; uint8_t x51 = _addcarryx_u64(x47, x30, x48, &x50);
-{ uint64_t x52 = (x35 & 0x3ff);
-{ uint64_t x54; uint8_t _ = _addcarryx_u64(x51, x33, x52, &x54);
-out[0] = x54;
-out[1] = x50;
-out[2] = x46;
-out[3] = x42;
-out[4] = x38;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesub(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x18 = in2[4];
+ { const uint64_t x19 = in2[3];
+ { const uint64_t x17 = in2[2];
+ { const uint64_t x15 = in2[1];
+ { const uint64_t x13 = in2[0];
+ { uint64_t x21; uint8_t x22 = _subborrow_u64(0x0, x5, x13, &x21);
+ { uint64_t x24; uint8_t x25 = _subborrow_u64(x22, x7, x15, &x24);
+ { uint64_t x27; uint8_t x28 = _subborrow_u64(x25, x9, x17, &x27);
+ { uint64_t x30; uint8_t x31 = _subborrow_u64(x28, x11, x19, &x30);
+ { uint64_t x33; uint8_t x34 = _subborrow_u64(x31, x10, x18, &x33);
+ { uint64_t x35 = (uint64_t)cmovznz(x34, 0x0, 0xffffffffffffffffL);
+ { uint64_t x36 = (x35 & 0xfffffffffffffffdL);
+ { uint64_t x38; uint8_t x39 = _addcarryx_u64(0x0, x21, x36, &x38);
+ { uint64_t x40 = (x35 & 0xffffffffffffffffL);
+ { uint64_t x42; uint8_t x43 = _addcarryx_u64(x39, x24, x40, &x42);
+ { uint64_t x44 = (x35 & 0xffffffffffffffffL);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x43, x27, x44, &x46);
+ { uint64_t x48 = (x35 & 0xffffffffffffffffL);
+ { uint64_t x50; uint8_t x51 = _addcarryx_u64(x47, x30, x48, &x50);
+ { uint64_t x52 = (x35 & 0x3ff);
+ { uint64_t x54; uint8_t _ = _addcarryx_u64(x51, x33, x52, &x54);
+ out[0] = x38;
+ out[1] = x42;
+ out[2] = x46;
+ out[3] = x50;
+ out[4] = x54;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e285m9/feadd.c b/src/Specific/montgomery64_2e285m9/feadd.c
index cc599a43a..ac5360cf4 100644
--- a/src/Specific/montgomery64_2e285m9/feadd.c
+++ b/src/Specific/montgomery64_2e285m9/feadd.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint64_t x21; uint8_t x22 = _addcarryx_u64(0x0, x5, x13, &x21);
-{ uint64_t x24; uint8_t x25 = _addcarryx_u64(x22, x7, x15, &x24);
-{ uint64_t x27; uint8_t x28 = _addcarryx_u64(x25, x9, x17, &x27);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x28, x11, x19, &x30);
-{ uint64_t x33; uint8_t x34 = _addcarryx_u64(x31, x10, x18, &x33);
-{ uint64_t x36; uint8_t x37 = _subborrow_u64(0x0, x21, 0xfffffffffffffff7L, &x36);
-{ uint64_t x39; uint8_t x40 = _subborrow_u64(x37, x24, 0xffffffffffffffffL, &x39);
-{ uint64_t x42; uint8_t x43 = _subborrow_u64(x40, x27, 0xffffffffffffffffL, &x42);
-{ uint64_t x45; uint8_t x46 = _subborrow_u64(x43, x30, 0xffffffffffffffffL, &x45);
-{ uint64_t x48; uint8_t x49 = _subborrow_u64(x46, x33, 0x1fffffff, &x48);
-{ uint64_t _; uint8_t x52 = _subborrow_u64(x49, x34, 0x0, &_);
-{ uint64_t x53 = cmovznz(x52, x48, x33);
-{ uint64_t x54 = cmovznz(x52, x45, x30);
-{ uint64_t x55 = cmovznz(x52, x42, x27);
-{ uint64_t x56 = cmovznz(x52, x39, x24);
-{ uint64_t x57 = cmovznz(x52, x36, x21);
-out[0] = x53;
-out[1] = x54;
-out[2] = x55;
-out[3] = x56;
-out[4] = x57;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feadd(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x18 = in2[4];
+ { const uint64_t x19 = in2[3];
+ { const uint64_t x17 = in2[2];
+ { const uint64_t x15 = in2[1];
+ { const uint64_t x13 = in2[0];
+ { uint64_t x21; uint8_t x22 = _addcarryx_u64(0x0, x5, x13, &x21);
+ { uint64_t x24; uint8_t x25 = _addcarryx_u64(x22, x7, x15, &x24);
+ { uint64_t x27; uint8_t x28 = _addcarryx_u64(x25, x9, x17, &x27);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x28, x11, x19, &x30);
+ { uint64_t x33; uint8_t x34 = _addcarryx_u64(x31, x10, x18, &x33);
+ { uint64_t x36; uint8_t x37 = _subborrow_u64(0x0, x21, 0xfffffffffffffff7L, &x36);
+ { uint64_t x39; uint8_t x40 = _subborrow_u64(x37, x24, 0xffffffffffffffffL, &x39);
+ { uint64_t x42; uint8_t x43 = _subborrow_u64(x40, x27, 0xffffffffffffffffL, &x42);
+ { uint64_t x45; uint8_t x46 = _subborrow_u64(x43, x30, 0xffffffffffffffffL, &x45);
+ { uint64_t x48; uint8_t x49 = _subborrow_u64(x46, x33, 0x1fffffff, &x48);
+ { uint64_t _; uint8_t x52 = _subborrow_u64(x49, x34, 0x0, &_);
+ { uint64_t x53 = cmovznz(x52, x48, x33);
+ { uint64_t x54 = cmovznz(x52, x45, x30);
+ { uint64_t x55 = cmovznz(x52, x42, x27);
+ { uint64_t x56 = cmovznz(x52, x39, x24);
+ { uint64_t x57 = cmovznz(x52, x36, x21);
+ out[0] = x57;
+ out[1] = x56;
+ out[2] = x55;
+ out[3] = x54;
+ out[4] = x53;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e285m9/femul.c b/src/Specific/montgomery64_2e285m9/femul.c
index efed9d392..e8bd245e2 100644
--- a/src/Specific/montgomery64_2e285m9/femul.c
+++ b/src/Specific/montgomery64_2e285m9/femul.c
@@ -1,200 +1,192 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint64_t x22; uint64_t x21 = _mulx_u64(x5, x13, &x22);
-{ uint64_t x25; uint64_t x24 = _mulx_u64(x5, x15, &x25);
-{ uint64_t x28; uint64_t x27 = _mulx_u64(x5, x17, &x28);
-{ uint64_t x31; uint64_t x30 = _mulx_u64(x5, x19, &x31);
-{ uint64_t x34; uint64_t x33 = _mulx_u64(x5, x18, &x34);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(0x0, x22, x24, &x36);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x25, x27, &x39);
-{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x28, x30, &x42);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x31, x33, &x45);
-{ uint64_t x48; uint8_t _ = _addcarryx_u64(0x0, x46, x34, &x48);
-{ uint64_t _; uint64_t x51 = _mulx_u64(x21, 0x8e38e38e38e38e39L, &_);
-{ uint64_t x55; uint64_t x54 = _mulx_u64(x51, 0xfffffffffffffff7L, &x55);
-{ uint64_t x58; uint64_t x57 = _mulx_u64(x51, 0xffffffffffffffffL, &x58);
-{ uint64_t x61; uint64_t x60 = _mulx_u64(x51, 0xffffffffffffffffL, &x61);
-{ uint64_t x64; uint64_t x63 = _mulx_u64(x51, 0xffffffffffffffffL, &x64);
-{ uint64_t x67; uint64_t x66 = _mulx_u64(x51, 0x1fffffff, &x67);
-{ uint64_t x69; uint8_t x70 = _addcarryx_u64(0x0, x55, x57, &x69);
-{ uint64_t x72; uint8_t x73 = _addcarryx_u64(x70, x58, x60, &x72);
-{ uint64_t x75; uint8_t x76 = _addcarryx_u64(x73, x61, x63, &x75);
-{ uint64_t x78; uint8_t x79 = _addcarryx_u64(x76, x64, x66, &x78);
-{ uint64_t x81; uint8_t _ = _addcarryx_u64(0x0, x79, x67, &x81);
-{ uint64_t _; uint8_t x85 = _addcarryx_u64(0x0, x21, x54, &_);
-{ uint64_t x87; uint8_t x88 = _addcarryx_u64(x85, x36, x69, &x87);
-{ uint64_t x90; uint8_t x91 = _addcarryx_u64(x88, x39, x72, &x90);
-{ uint64_t x93; uint8_t x94 = _addcarryx_u64(x91, x42, x75, &x93);
-{ uint64_t x96; uint8_t x97 = _addcarryx_u64(x94, x45, x78, &x96);
-{ uint64_t x99; uint8_t x100 = _addcarryx_u64(x97, x48, x81, &x99);
-{ uint64_t x103; uint64_t x102 = _mulx_u64(x7, x13, &x103);
-{ uint64_t x106; uint64_t x105 = _mulx_u64(x7, x15, &x106);
-{ uint64_t x109; uint64_t x108 = _mulx_u64(x7, x17, &x109);
-{ uint64_t x112; uint64_t x111 = _mulx_u64(x7, x19, &x112);
-{ uint64_t x115; uint64_t x114 = _mulx_u64(x7, x18, &x115);
-{ uint64_t x117; uint8_t x118 = _addcarryx_u64(0x0, x103, x105, &x117);
-{ uint64_t x120; uint8_t x121 = _addcarryx_u64(x118, x106, x108, &x120);
-{ uint64_t x123; uint8_t x124 = _addcarryx_u64(x121, x109, x111, &x123);
-{ uint64_t x126; uint8_t x127 = _addcarryx_u64(x124, x112, x114, &x126);
-{ uint64_t x129; uint8_t _ = _addcarryx_u64(0x0, x127, x115, &x129);
-{ uint64_t x132; uint8_t x133 = _addcarryx_u64(0x0, x87, x102, &x132);
-{ uint64_t x135; uint8_t x136 = _addcarryx_u64(x133, x90, x117, &x135);
-{ uint64_t x138; uint8_t x139 = _addcarryx_u64(x136, x93, x120, &x138);
-{ uint64_t x141; uint8_t x142 = _addcarryx_u64(x139, x96, x123, &x141);
-{ uint64_t x144; uint8_t x145 = _addcarryx_u64(x142, x99, x126, &x144);
-{ uint64_t x147; uint8_t x148 = _addcarryx_u64(x145, x100, x129, &x147);
-{ uint64_t _; uint64_t x150 = _mulx_u64(x132, 0x8e38e38e38e38e39L, &_);
-{ uint64_t x154; uint64_t x153 = _mulx_u64(x150, 0xfffffffffffffff7L, &x154);
-{ uint64_t x157; uint64_t x156 = _mulx_u64(x150, 0xffffffffffffffffL, &x157);
-{ uint64_t x160; uint64_t x159 = _mulx_u64(x150, 0xffffffffffffffffL, &x160);
-{ uint64_t x163; uint64_t x162 = _mulx_u64(x150, 0xffffffffffffffffL, &x163);
-{ uint64_t x166; uint64_t x165 = _mulx_u64(x150, 0x1fffffff, &x166);
-{ uint64_t x168; uint8_t x169 = _addcarryx_u64(0x0, x154, x156, &x168);
-{ uint64_t x171; uint8_t x172 = _addcarryx_u64(x169, x157, x159, &x171);
-{ uint64_t x174; uint8_t x175 = _addcarryx_u64(x172, x160, x162, &x174);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(x175, x163, x165, &x177);
-{ uint64_t x180; uint8_t _ = _addcarryx_u64(0x0, x178, x166, &x180);
-{ uint64_t _; uint8_t x184 = _addcarryx_u64(0x0, x132, x153, &_);
-{ uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x135, x168, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x138, x171, &x189);
-{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x141, x174, &x192);
-{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x144, x177, &x195);
-{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x147, x180, &x198);
-{ uint8_t x200 = (x199 + x148);
-{ uint64_t x203; uint64_t x202 = _mulx_u64(x9, x13, &x203);
-{ uint64_t x206; uint64_t x205 = _mulx_u64(x9, x15, &x206);
-{ uint64_t x209; uint64_t x208 = _mulx_u64(x9, x17, &x209);
-{ uint64_t x212; uint64_t x211 = _mulx_u64(x9, x19, &x212);
-{ uint64_t x215; uint64_t x214 = _mulx_u64(x9, x18, &x215);
-{ uint64_t x217; uint8_t x218 = _addcarryx_u64(0x0, x203, x205, &x217);
-{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x206, x208, &x220);
-{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x209, x211, &x223);
-{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x212, x214, &x226);
-{ uint64_t x229; uint8_t _ = _addcarryx_u64(0x0, x227, x215, &x229);
-{ uint64_t x232; uint8_t x233 = _addcarryx_u64(0x0, x186, x202, &x232);
-{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x189, x217, &x235);
-{ uint64_t x238; uint8_t x239 = _addcarryx_u64(x236, x192, x220, &x238);
-{ uint64_t x241; uint8_t x242 = _addcarryx_u64(x239, x195, x223, &x241);
-{ uint64_t x244; uint8_t x245 = _addcarryx_u64(x242, x198, x226, &x244);
-{ uint64_t x247; uint8_t x248 = _addcarryx_u64(x245, x200, x229, &x247);
-{ uint64_t _; uint64_t x250 = _mulx_u64(x232, 0x8e38e38e38e38e39L, &_);
-{ uint64_t x254; uint64_t x253 = _mulx_u64(x250, 0xfffffffffffffff7L, &x254);
-{ uint64_t x257; uint64_t x256 = _mulx_u64(x250, 0xffffffffffffffffL, &x257);
-{ uint64_t x260; uint64_t x259 = _mulx_u64(x250, 0xffffffffffffffffL, &x260);
-{ uint64_t x263; uint64_t x262 = _mulx_u64(x250, 0xffffffffffffffffL, &x263);
-{ uint64_t x266; uint64_t x265 = _mulx_u64(x250, 0x1fffffff, &x266);
-{ uint64_t x268; uint8_t x269 = _addcarryx_u64(0x0, x254, x256, &x268);
-{ uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x257, x259, &x271);
-{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x260, x262, &x274);
-{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x263, x265, &x277);
-{ uint64_t x280; uint8_t _ = _addcarryx_u64(0x0, x278, x266, &x280);
-{ uint64_t _; uint8_t x284 = _addcarryx_u64(0x0, x232, x253, &_);
-{ uint64_t x286; uint8_t x287 = _addcarryx_u64(x284, x235, x268, &x286);
-{ uint64_t x289; uint8_t x290 = _addcarryx_u64(x287, x238, x271, &x289);
-{ uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x241, x274, &x292);
-{ uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x244, x277, &x295);
-{ uint64_t x298; uint8_t x299 = _addcarryx_u64(x296, x247, x280, &x298);
-{ uint8_t x300 = (x299 + x248);
-{ uint64_t x303; uint64_t x302 = _mulx_u64(x11, x13, &x303);
-{ uint64_t x306; uint64_t x305 = _mulx_u64(x11, x15, &x306);
-{ uint64_t x309; uint64_t x308 = _mulx_u64(x11, x17, &x309);
-{ uint64_t x312; uint64_t x311 = _mulx_u64(x11, x19, &x312);
-{ uint64_t x315; uint64_t x314 = _mulx_u64(x11, x18, &x315);
-{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x303, x305, &x317);
-{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x306, x308, &x320);
-{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x309, x311, &x323);
-{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x312, x314, &x326);
-{ uint64_t x329; uint8_t _ = _addcarryx_u64(0x0, x327, x315, &x329);
-{ uint64_t x332; uint8_t x333 = _addcarryx_u64(0x0, x286, x302, &x332);
-{ uint64_t x335; uint8_t x336 = _addcarryx_u64(x333, x289, x317, &x335);
-{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x292, x320, &x338);
-{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x295, x323, &x341);
-{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x298, x326, &x344);
-{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x300, x329, &x347);
-{ uint64_t _; uint64_t x350 = _mulx_u64(x332, 0x8e38e38e38e38e39L, &_);
-{ uint64_t x354; uint64_t x353 = _mulx_u64(x350, 0xfffffffffffffff7L, &x354);
-{ uint64_t x357; uint64_t x356 = _mulx_u64(x350, 0xffffffffffffffffL, &x357);
-{ uint64_t x360; uint64_t x359 = _mulx_u64(x350, 0xffffffffffffffffL, &x360);
-{ uint64_t x363; uint64_t x362 = _mulx_u64(x350, 0xffffffffffffffffL, &x363);
-{ uint64_t x366; uint64_t x365 = _mulx_u64(x350, 0x1fffffff, &x366);
-{ uint64_t x368; uint8_t x369 = _addcarryx_u64(0x0, x354, x356, &x368);
-{ uint64_t x371; uint8_t x372 = _addcarryx_u64(x369, x357, x359, &x371);
-{ uint64_t x374; uint8_t x375 = _addcarryx_u64(x372, x360, x362, &x374);
-{ uint64_t x377; uint8_t x378 = _addcarryx_u64(x375, x363, x365, &x377);
-{ uint64_t x380; uint8_t _ = _addcarryx_u64(0x0, x378, x366, &x380);
-{ uint64_t _; uint8_t x384 = _addcarryx_u64(0x0, x332, x353, &_);
-{ uint64_t x386; uint8_t x387 = _addcarryx_u64(x384, x335, x368, &x386);
-{ uint64_t x389; uint8_t x390 = _addcarryx_u64(x387, x338, x371, &x389);
-{ uint64_t x392; uint8_t x393 = _addcarryx_u64(x390, x341, x374, &x392);
-{ uint64_t x395; uint8_t x396 = _addcarryx_u64(x393, x344, x377, &x395);
-{ uint64_t x398; uint8_t x399 = _addcarryx_u64(x396, x347, x380, &x398);
-{ uint8_t x400 = (x399 + x348);
-{ uint64_t x403; uint64_t x402 = _mulx_u64(x10, x13, &x403);
-{ uint64_t x406; uint64_t x405 = _mulx_u64(x10, x15, &x406);
-{ uint64_t x409; uint64_t x408 = _mulx_u64(x10, x17, &x409);
-{ uint64_t x412; uint64_t x411 = _mulx_u64(x10, x19, &x412);
-{ uint64_t x415; uint64_t x414 = _mulx_u64(x10, x18, &x415);
-{ uint64_t x417; uint8_t x418 = _addcarryx_u64(0x0, x403, x405, &x417);
-{ uint64_t x420; uint8_t x421 = _addcarryx_u64(x418, x406, x408, &x420);
-{ uint64_t x423; uint8_t x424 = _addcarryx_u64(x421, x409, x411, &x423);
-{ uint64_t x426; uint8_t x427 = _addcarryx_u64(x424, x412, x414, &x426);
-{ uint64_t x429; uint8_t _ = _addcarryx_u64(0x0, x427, x415, &x429);
-{ uint64_t x432; uint8_t x433 = _addcarryx_u64(0x0, x386, x402, &x432);
-{ uint64_t x435; uint8_t x436 = _addcarryx_u64(x433, x389, x417, &x435);
-{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x392, x420, &x438);
-{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x395, x423, &x441);
-{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x398, x426, &x444);
-{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x400, x429, &x447);
-{ uint64_t _; uint64_t x450 = _mulx_u64(x432, 0x8e38e38e38e38e39L, &_);
-{ uint64_t x454; uint64_t x453 = _mulx_u64(x450, 0xfffffffffffffff7L, &x454);
-{ uint64_t x457; uint64_t x456 = _mulx_u64(x450, 0xffffffffffffffffL, &x457);
-{ uint64_t x460; uint64_t x459 = _mulx_u64(x450, 0xffffffffffffffffL, &x460);
-{ uint64_t x463; uint64_t x462 = _mulx_u64(x450, 0xffffffffffffffffL, &x463);
-{ uint64_t x466; uint64_t x465 = _mulx_u64(x450, 0x1fffffff, &x466);
-{ uint64_t x468; uint8_t x469 = _addcarryx_u64(0x0, x454, x456, &x468);
-{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x457, x459, &x471);
-{ uint64_t x474; uint8_t x475 = _addcarryx_u64(x472, x460, x462, &x474);
-{ uint64_t x477; uint8_t x478 = _addcarryx_u64(x475, x463, x465, &x477);
-{ uint64_t x480; uint8_t _ = _addcarryx_u64(0x0, x478, x466, &x480);
-{ uint64_t _; uint8_t x484 = _addcarryx_u64(0x0, x432, x453, &_);
-{ uint64_t x486; uint8_t x487 = _addcarryx_u64(x484, x435, x468, &x486);
-{ uint64_t x489; uint8_t x490 = _addcarryx_u64(x487, x438, x471, &x489);
-{ uint64_t x492; uint8_t x493 = _addcarryx_u64(x490, x441, x474, &x492);
-{ uint64_t x495; uint8_t x496 = _addcarryx_u64(x493, x444, x477, &x495);
-{ uint64_t x498; uint8_t x499 = _addcarryx_u64(x496, x447, x480, &x498);
-{ uint8_t x500 = (x499 + x448);
-{ uint64_t x502; uint8_t x503 = _subborrow_u64(0x0, x486, 0xfffffffffffffff7L, &x502);
-{ uint64_t x505; uint8_t x506 = _subborrow_u64(x503, x489, 0xffffffffffffffffL, &x505);
-{ uint64_t x508; uint8_t x509 = _subborrow_u64(x506, x492, 0xffffffffffffffffL, &x508);
-{ uint64_t x511; uint8_t x512 = _subborrow_u64(x509, x495, 0xffffffffffffffffL, &x511);
-{ uint64_t x514; uint8_t x515 = _subborrow_u64(x512, x498, 0x1fffffff, &x514);
-{ uint64_t _; uint8_t x518 = _subborrow_u64(x515, x500, 0x0, &_);
-{ uint64_t x519 = cmovznz(x518, x514, x498);
-{ uint64_t x520 = cmovznz(x518, x511, x495);
-{ uint64_t x521 = cmovznz(x518, x508, x492);
-{ uint64_t x522 = cmovznz(x518, x505, x489);
-{ uint64_t x523 = cmovznz(x518, x502, x486);
-out[0] = x519;
-out[1] = x520;
-out[2] = x521;
-out[3] = x522;
-out[4] = x523;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x18 = in2[4];
+ { const uint64_t x19 = in2[3];
+ { const uint64_t x17 = in2[2];
+ { const uint64_t x15 = in2[1];
+ { const uint64_t x13 = in2[0];
+ { uint64_t x22; uint64_t x21 = _mulx_u64(x5, x13, &x22);
+ { uint64_t x25; uint64_t x24 = _mulx_u64(x5, x15, &x25);
+ { uint64_t x28; uint64_t x27 = _mulx_u64(x5, x17, &x28);
+ { uint64_t x31; uint64_t x30 = _mulx_u64(x5, x19, &x31);
+ { uint64_t x34; uint64_t x33 = _mulx_u64(x5, x18, &x34);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(0x0, x22, x24, &x36);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x25, x27, &x39);
+ { uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x28, x30, &x42);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x31, x33, &x45);
+ { uint64_t x48; uint8_t _ = _addcarryx_u64(0x0, x46, x34, &x48);
+ { uint64_t _; uint64_t x51 = _mulx_u64(x21, 0x8e38e38e38e38e39L, &_);
+ { uint64_t x55; uint64_t x54 = _mulx_u64(x51, 0xfffffffffffffff7L, &x55);
+ { uint64_t x58; uint64_t x57 = _mulx_u64(x51, 0xffffffffffffffffL, &x58);
+ { uint64_t x61; uint64_t x60 = _mulx_u64(x51, 0xffffffffffffffffL, &x61);
+ { uint64_t x64; uint64_t x63 = _mulx_u64(x51, 0xffffffffffffffffL, &x64);
+ { uint64_t x67; uint64_t x66 = _mulx_u64(x51, 0x1fffffff, &x67);
+ { uint64_t x69; uint8_t x70 = _addcarryx_u64(0x0, x55, x57, &x69);
+ { uint64_t x72; uint8_t x73 = _addcarryx_u64(x70, x58, x60, &x72);
+ { uint64_t x75; uint8_t x76 = _addcarryx_u64(x73, x61, x63, &x75);
+ { uint64_t x78; uint8_t x79 = _addcarryx_u64(x76, x64, x66, &x78);
+ { uint64_t x81; uint8_t _ = _addcarryx_u64(0x0, x79, x67, &x81);
+ { uint64_t _; uint8_t x85 = _addcarryx_u64(0x0, x21, x54, &_);
+ { uint64_t x87; uint8_t x88 = _addcarryx_u64(x85, x36, x69, &x87);
+ { uint64_t x90; uint8_t x91 = _addcarryx_u64(x88, x39, x72, &x90);
+ { uint64_t x93; uint8_t x94 = _addcarryx_u64(x91, x42, x75, &x93);
+ { uint64_t x96; uint8_t x97 = _addcarryx_u64(x94, x45, x78, &x96);
+ { uint64_t x99; uint8_t x100 = _addcarryx_u64(x97, x48, x81, &x99);
+ { uint64_t x103; uint64_t x102 = _mulx_u64(x7, x13, &x103);
+ { uint64_t x106; uint64_t x105 = _mulx_u64(x7, x15, &x106);
+ { uint64_t x109; uint64_t x108 = _mulx_u64(x7, x17, &x109);
+ { uint64_t x112; uint64_t x111 = _mulx_u64(x7, x19, &x112);
+ { uint64_t x115; uint64_t x114 = _mulx_u64(x7, x18, &x115);
+ { uint64_t x117; uint8_t x118 = _addcarryx_u64(0x0, x103, x105, &x117);
+ { uint64_t x120; uint8_t x121 = _addcarryx_u64(x118, x106, x108, &x120);
+ { uint64_t x123; uint8_t x124 = _addcarryx_u64(x121, x109, x111, &x123);
+ { uint64_t x126; uint8_t x127 = _addcarryx_u64(x124, x112, x114, &x126);
+ { uint64_t x129; uint8_t _ = _addcarryx_u64(0x0, x127, x115, &x129);
+ { uint64_t x132; uint8_t x133 = _addcarryx_u64(0x0, x87, x102, &x132);
+ { uint64_t x135; uint8_t x136 = _addcarryx_u64(x133, x90, x117, &x135);
+ { uint64_t x138; uint8_t x139 = _addcarryx_u64(x136, x93, x120, &x138);
+ { uint64_t x141; uint8_t x142 = _addcarryx_u64(x139, x96, x123, &x141);
+ { uint64_t x144; uint8_t x145 = _addcarryx_u64(x142, x99, x126, &x144);
+ { uint64_t x147; uint8_t x148 = _addcarryx_u64(x145, x100, x129, &x147);
+ { uint64_t _; uint64_t x150 = _mulx_u64(x132, 0x8e38e38e38e38e39L, &_);
+ { uint64_t x154; uint64_t x153 = _mulx_u64(x150, 0xfffffffffffffff7L, &x154);
+ { uint64_t x157; uint64_t x156 = _mulx_u64(x150, 0xffffffffffffffffL, &x157);
+ { uint64_t x160; uint64_t x159 = _mulx_u64(x150, 0xffffffffffffffffL, &x160);
+ { uint64_t x163; uint64_t x162 = _mulx_u64(x150, 0xffffffffffffffffL, &x163);
+ { uint64_t x166; uint64_t x165 = _mulx_u64(x150, 0x1fffffff, &x166);
+ { uint64_t x168; uint8_t x169 = _addcarryx_u64(0x0, x154, x156, &x168);
+ { uint64_t x171; uint8_t x172 = _addcarryx_u64(x169, x157, x159, &x171);
+ { uint64_t x174; uint8_t x175 = _addcarryx_u64(x172, x160, x162, &x174);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(x175, x163, x165, &x177);
+ { uint64_t x180; uint8_t _ = _addcarryx_u64(0x0, x178, x166, &x180);
+ { uint64_t _; uint8_t x184 = _addcarryx_u64(0x0, x132, x153, &_);
+ { uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x135, x168, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x138, x171, &x189);
+ { uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x141, x174, &x192);
+ { uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x144, x177, &x195);
+ { uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x147, x180, &x198);
+ { uint8_t x200 = (x199 + x148);
+ { uint64_t x203; uint64_t x202 = _mulx_u64(x9, x13, &x203);
+ { uint64_t x206; uint64_t x205 = _mulx_u64(x9, x15, &x206);
+ { uint64_t x209; uint64_t x208 = _mulx_u64(x9, x17, &x209);
+ { uint64_t x212; uint64_t x211 = _mulx_u64(x9, x19, &x212);
+ { uint64_t x215; uint64_t x214 = _mulx_u64(x9, x18, &x215);
+ { uint64_t x217; uint8_t x218 = _addcarryx_u64(0x0, x203, x205, &x217);
+ { uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x206, x208, &x220);
+ { uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x209, x211, &x223);
+ { uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x212, x214, &x226);
+ { uint64_t x229; uint8_t _ = _addcarryx_u64(0x0, x227, x215, &x229);
+ { uint64_t x232; uint8_t x233 = _addcarryx_u64(0x0, x186, x202, &x232);
+ { uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x189, x217, &x235);
+ { uint64_t x238; uint8_t x239 = _addcarryx_u64(x236, x192, x220, &x238);
+ { uint64_t x241; uint8_t x242 = _addcarryx_u64(x239, x195, x223, &x241);
+ { uint64_t x244; uint8_t x245 = _addcarryx_u64(x242, x198, x226, &x244);
+ { uint64_t x247; uint8_t x248 = _addcarryx_u64(x245, x200, x229, &x247);
+ { uint64_t _; uint64_t x250 = _mulx_u64(x232, 0x8e38e38e38e38e39L, &_);
+ { uint64_t x254; uint64_t x253 = _mulx_u64(x250, 0xfffffffffffffff7L, &x254);
+ { uint64_t x257; uint64_t x256 = _mulx_u64(x250, 0xffffffffffffffffL, &x257);
+ { uint64_t x260; uint64_t x259 = _mulx_u64(x250, 0xffffffffffffffffL, &x260);
+ { uint64_t x263; uint64_t x262 = _mulx_u64(x250, 0xffffffffffffffffL, &x263);
+ { uint64_t x266; uint64_t x265 = _mulx_u64(x250, 0x1fffffff, &x266);
+ { uint64_t x268; uint8_t x269 = _addcarryx_u64(0x0, x254, x256, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x257, x259, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x260, x262, &x274);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x263, x265, &x277);
+ { uint64_t x280; uint8_t _ = _addcarryx_u64(0x0, x278, x266, &x280);
+ { uint64_t _; uint8_t x284 = _addcarryx_u64(0x0, x232, x253, &_);
+ { uint64_t x286; uint8_t x287 = _addcarryx_u64(x284, x235, x268, &x286);
+ { uint64_t x289; uint8_t x290 = _addcarryx_u64(x287, x238, x271, &x289);
+ { uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x241, x274, &x292);
+ { uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x244, x277, &x295);
+ { uint64_t x298; uint8_t x299 = _addcarryx_u64(x296, x247, x280, &x298);
+ { uint8_t x300 = (x299 + x248);
+ { uint64_t x303; uint64_t x302 = _mulx_u64(x11, x13, &x303);
+ { uint64_t x306; uint64_t x305 = _mulx_u64(x11, x15, &x306);
+ { uint64_t x309; uint64_t x308 = _mulx_u64(x11, x17, &x309);
+ { uint64_t x312; uint64_t x311 = _mulx_u64(x11, x19, &x312);
+ { uint64_t x315; uint64_t x314 = _mulx_u64(x11, x18, &x315);
+ { uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x303, x305, &x317);
+ { uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x306, x308, &x320);
+ { uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x309, x311, &x323);
+ { uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x312, x314, &x326);
+ { uint64_t x329; uint8_t _ = _addcarryx_u64(0x0, x327, x315, &x329);
+ { uint64_t x332; uint8_t x333 = _addcarryx_u64(0x0, x286, x302, &x332);
+ { uint64_t x335; uint8_t x336 = _addcarryx_u64(x333, x289, x317, &x335);
+ { uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x292, x320, &x338);
+ { uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x295, x323, &x341);
+ { uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x298, x326, &x344);
+ { uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x300, x329, &x347);
+ { uint64_t _; uint64_t x350 = _mulx_u64(x332, 0x8e38e38e38e38e39L, &_);
+ { uint64_t x354; uint64_t x353 = _mulx_u64(x350, 0xfffffffffffffff7L, &x354);
+ { uint64_t x357; uint64_t x356 = _mulx_u64(x350, 0xffffffffffffffffL, &x357);
+ { uint64_t x360; uint64_t x359 = _mulx_u64(x350, 0xffffffffffffffffL, &x360);
+ { uint64_t x363; uint64_t x362 = _mulx_u64(x350, 0xffffffffffffffffL, &x363);
+ { uint64_t x366; uint64_t x365 = _mulx_u64(x350, 0x1fffffff, &x366);
+ { uint64_t x368; uint8_t x369 = _addcarryx_u64(0x0, x354, x356, &x368);
+ { uint64_t x371; uint8_t x372 = _addcarryx_u64(x369, x357, x359, &x371);
+ { uint64_t x374; uint8_t x375 = _addcarryx_u64(x372, x360, x362, &x374);
+ { uint64_t x377; uint8_t x378 = _addcarryx_u64(x375, x363, x365, &x377);
+ { uint64_t x380; uint8_t _ = _addcarryx_u64(0x0, x378, x366, &x380);
+ { uint64_t _; uint8_t x384 = _addcarryx_u64(0x0, x332, x353, &_);
+ { uint64_t x386; uint8_t x387 = _addcarryx_u64(x384, x335, x368, &x386);
+ { uint64_t x389; uint8_t x390 = _addcarryx_u64(x387, x338, x371, &x389);
+ { uint64_t x392; uint8_t x393 = _addcarryx_u64(x390, x341, x374, &x392);
+ { uint64_t x395; uint8_t x396 = _addcarryx_u64(x393, x344, x377, &x395);
+ { uint64_t x398; uint8_t x399 = _addcarryx_u64(x396, x347, x380, &x398);
+ { uint8_t x400 = (x399 + x348);
+ { uint64_t x403; uint64_t x402 = _mulx_u64(x10, x13, &x403);
+ { uint64_t x406; uint64_t x405 = _mulx_u64(x10, x15, &x406);
+ { uint64_t x409; uint64_t x408 = _mulx_u64(x10, x17, &x409);
+ { uint64_t x412; uint64_t x411 = _mulx_u64(x10, x19, &x412);
+ { uint64_t x415; uint64_t x414 = _mulx_u64(x10, x18, &x415);
+ { uint64_t x417; uint8_t x418 = _addcarryx_u64(0x0, x403, x405, &x417);
+ { uint64_t x420; uint8_t x421 = _addcarryx_u64(x418, x406, x408, &x420);
+ { uint64_t x423; uint8_t x424 = _addcarryx_u64(x421, x409, x411, &x423);
+ { uint64_t x426; uint8_t x427 = _addcarryx_u64(x424, x412, x414, &x426);
+ { uint64_t x429; uint8_t _ = _addcarryx_u64(0x0, x427, x415, &x429);
+ { uint64_t x432; uint8_t x433 = _addcarryx_u64(0x0, x386, x402, &x432);
+ { uint64_t x435; uint8_t x436 = _addcarryx_u64(x433, x389, x417, &x435);
+ { uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x392, x420, &x438);
+ { uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x395, x423, &x441);
+ { uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x398, x426, &x444);
+ { uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x400, x429, &x447);
+ { uint64_t _; uint64_t x450 = _mulx_u64(x432, 0x8e38e38e38e38e39L, &_);
+ { uint64_t x454; uint64_t x453 = _mulx_u64(x450, 0xfffffffffffffff7L, &x454);
+ { uint64_t x457; uint64_t x456 = _mulx_u64(x450, 0xffffffffffffffffL, &x457);
+ { uint64_t x460; uint64_t x459 = _mulx_u64(x450, 0xffffffffffffffffL, &x460);
+ { uint64_t x463; uint64_t x462 = _mulx_u64(x450, 0xffffffffffffffffL, &x463);
+ { uint64_t x466; uint64_t x465 = _mulx_u64(x450, 0x1fffffff, &x466);
+ { uint64_t x468; uint8_t x469 = _addcarryx_u64(0x0, x454, x456, &x468);
+ { uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x457, x459, &x471);
+ { uint64_t x474; uint8_t x475 = _addcarryx_u64(x472, x460, x462, &x474);
+ { uint64_t x477; uint8_t x478 = _addcarryx_u64(x475, x463, x465, &x477);
+ { uint64_t x480; uint8_t _ = _addcarryx_u64(0x0, x478, x466, &x480);
+ { uint64_t _; uint8_t x484 = _addcarryx_u64(0x0, x432, x453, &_);
+ { uint64_t x486; uint8_t x487 = _addcarryx_u64(x484, x435, x468, &x486);
+ { uint64_t x489; uint8_t x490 = _addcarryx_u64(x487, x438, x471, &x489);
+ { uint64_t x492; uint8_t x493 = _addcarryx_u64(x490, x441, x474, &x492);
+ { uint64_t x495; uint8_t x496 = _addcarryx_u64(x493, x444, x477, &x495);
+ { uint64_t x498; uint8_t x499 = _addcarryx_u64(x496, x447, x480, &x498);
+ { uint8_t x500 = (x499 + x448);
+ { uint64_t x502; uint8_t x503 = _subborrow_u64(0x0, x486, 0xfffffffffffffff7L, &x502);
+ { uint64_t x505; uint8_t x506 = _subborrow_u64(x503, x489, 0xffffffffffffffffL, &x505);
+ { uint64_t x508; uint8_t x509 = _subborrow_u64(x506, x492, 0xffffffffffffffffL, &x508);
+ { uint64_t x511; uint8_t x512 = _subborrow_u64(x509, x495, 0xffffffffffffffffL, &x511);
+ { uint64_t x514; uint8_t x515 = _subborrow_u64(x512, x498, 0x1fffffff, &x514);
+ { uint64_t _; uint8_t x518 = _subborrow_u64(x515, x500, 0x0, &_);
+ { uint64_t x519 = cmovznz(x518, x514, x498);
+ { uint64_t x520 = cmovznz(x518, x511, x495);
+ { uint64_t x521 = cmovznz(x518, x508, x492);
+ { uint64_t x522 = cmovznz(x518, x505, x489);
+ { uint64_t x523 = cmovznz(x518, x502, x486);
+ out[0] = x523;
+ out[1] = x522;
+ out[2] = x521;
+ out[3] = x520;
+ out[4] = x519;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e285m9/fenz.c b/src/Specific/montgomery64_2e285m9/fenz.c
index aaabff8a7..0a779af7c 100644
--- a/src/Specific/montgomery64_2e285m9/fenz.c
+++ b/src/Specific/montgomery64_2e285m9/fenz.c
@@ -1,26 +1,13 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x9 = (x8 | x7);
-{ uint64_t x10 = (x6 | x9);
-{ uint64_t x11 = (x4 | x10);
-{ uint64_t x12 = (x2 | x11);
-out[0] = x12;
-}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x9 = (x8 | x7);
+ { uint64_t x10 = (x6 | x9);
+ { uint64_t x11 = (x4 | x10);
+ { uint64_t x12 = (x2 | x11);
+ out[0] = x12;
+ }}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e285m9/feopp.c b/src/Specific/montgomery64_2e285m9/feopp.c
index cee3ef770..65409d0ee 100644
--- a/src/Specific/montgomery64_2e285m9/feopp.c
+++ b/src/Specific/montgomery64_2e285m9/feopp.c
@@ -1,42 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x10; uint8_t x11 = _subborrow_u64(0x0, 0x0, x2, &x10);
-{ uint64_t x13; uint8_t x14 = _subborrow_u64(x11, 0x0, x4, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, 0x0, x6, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, 0x0, x8, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u64(x20, 0x0, x7, &x22);
-{ uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
-{ uint64_t x25 = (x24 & 0xfffffffffffffff7L);
-{ uint64_t x27; uint8_t x28 = _addcarryx_u64(0x0, x10, x25, &x27);
-{ uint64_t x29 = (x24 & 0xffffffffffffffffL);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x28, x13, x29, &x31);
-{ uint64_t x33 = (x24 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x16, x33, &x35);
-{ uint64_t x37 = (x24 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x19, x37, &x39);
-{ uint64_t x41 = (x24 & 0x1fffffff);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x22, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-out[4] = x27;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feopp(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x10; uint8_t x11 = _subborrow_u64(0x0, 0x0, x2, &x10);
+ { uint64_t x13; uint8_t x14 = _subborrow_u64(x11, 0x0, x4, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u64(x14, 0x0, x6, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u64(x17, 0x0, x8, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u64(x20, 0x0, x7, &x22);
+ { uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
+ { uint64_t x25 = (x24 & 0xfffffffffffffff7L);
+ { uint64_t x27; uint8_t x28 = _addcarryx_u64(0x0, x10, x25, &x27);
+ { uint64_t x29 = (x24 & 0xffffffffffffffffL);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(x28, x13, x29, &x31);
+ { uint64_t x33 = (x24 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x16, x33, &x35);
+ { uint64_t x37 = (x24 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x19, x37, &x39);
+ { uint64_t x41 = (x24 & 0x1fffffff);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x22, x41, &x43);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e285m9/fesub.c b/src/Specific/montgomery64_2e285m9/fesub.c
index 1fe59bbdc..4b23709de 100644
--- a/src/Specific/montgomery64_2e285m9/fesub.c
+++ b/src/Specific/montgomery64_2e285m9/fesub.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint64_t x21; uint8_t x22 = _subborrow_u64(0x0, x5, x13, &x21);
-{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, x7, x15, &x24);
-{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, x9, x17, &x27);
-{ uint64_t x30; uint8_t x31 = _subborrow_u64(x28, x11, x19, &x30);
-{ uint64_t x33; uint8_t x34 = _subborrow_u64(x31, x10, x18, &x33);
-{ uint64_t x35 = (uint64_t)cmovznz(x34, 0x0, 0xffffffffffffffffL);
-{ uint64_t x36 = (x35 & 0xfffffffffffffff7L);
-{ uint64_t x38; uint8_t x39 = _addcarryx_u64(0x0, x21, x36, &x38);
-{ uint64_t x40 = (x35 & 0xffffffffffffffffL);
-{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x39, x24, x40, &x42);
-{ uint64_t x44 = (x35 & 0xffffffffffffffffL);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x43, x27, x44, &x46);
-{ uint64_t x48 = (x35 & 0xffffffffffffffffL);
-{ uint64_t x50; uint8_t x51 = _addcarryx_u64(x47, x30, x48, &x50);
-{ uint64_t x52 = (x35 & 0x1fffffff);
-{ uint64_t x54; uint8_t _ = _addcarryx_u64(x51, x33, x52, &x54);
-out[0] = x54;
-out[1] = x50;
-out[2] = x46;
-out[3] = x42;
-out[4] = x38;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesub(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x18 = in2[4];
+ { const uint64_t x19 = in2[3];
+ { const uint64_t x17 = in2[2];
+ { const uint64_t x15 = in2[1];
+ { const uint64_t x13 = in2[0];
+ { uint64_t x21; uint8_t x22 = _subborrow_u64(0x0, x5, x13, &x21);
+ { uint64_t x24; uint8_t x25 = _subborrow_u64(x22, x7, x15, &x24);
+ { uint64_t x27; uint8_t x28 = _subborrow_u64(x25, x9, x17, &x27);
+ { uint64_t x30; uint8_t x31 = _subborrow_u64(x28, x11, x19, &x30);
+ { uint64_t x33; uint8_t x34 = _subborrow_u64(x31, x10, x18, &x33);
+ { uint64_t x35 = (uint64_t)cmovznz(x34, 0x0, 0xffffffffffffffffL);
+ { uint64_t x36 = (x35 & 0xfffffffffffffff7L);
+ { uint64_t x38; uint8_t x39 = _addcarryx_u64(0x0, x21, x36, &x38);
+ { uint64_t x40 = (x35 & 0xffffffffffffffffL);
+ { uint64_t x42; uint8_t x43 = _addcarryx_u64(x39, x24, x40, &x42);
+ { uint64_t x44 = (x35 & 0xffffffffffffffffL);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x43, x27, x44, &x46);
+ { uint64_t x48 = (x35 & 0xffffffffffffffffL);
+ { uint64_t x50; uint8_t x51 = _addcarryx_u64(x47, x30, x48, &x50);
+ { uint64_t x52 = (x35 & 0x1fffffff);
+ { uint64_t x54; uint8_t _ = _addcarryx_u64(x51, x33, x52, &x54);
+ out[0] = x38;
+ out[1] = x42;
+ out[2] = x46;
+ out[3] = x50;
+ out[4] = x54;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e291m19/feadd.c b/src/Specific/montgomery64_2e291m19/feadd.c
index 382d7aea0..7962323ed 100644
--- a/src/Specific/montgomery64_2e291m19/feadd.c
+++ b/src/Specific/montgomery64_2e291m19/feadd.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint64_t x21; uint8_t x22 = _addcarryx_u64(0x0, x5, x13, &x21);
-{ uint64_t x24; uint8_t x25 = _addcarryx_u64(x22, x7, x15, &x24);
-{ uint64_t x27; uint8_t x28 = _addcarryx_u64(x25, x9, x17, &x27);
-{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x28, x11, x19, &x30);
-{ uint64_t x33; uint8_t x34 = _addcarryx_u64(x31, x10, x18, &x33);
-{ uint64_t x36; uint8_t x37 = _subborrow_u64(0x0, x21, 0xffffffffffffffedL, &x36);
-{ uint64_t x39; uint8_t x40 = _subborrow_u64(x37, x24, 0xffffffffffffffffL, &x39);
-{ uint64_t x42; uint8_t x43 = _subborrow_u64(x40, x27, 0xffffffffffffffffL, &x42);
-{ uint64_t x45; uint8_t x46 = _subborrow_u64(x43, x30, 0xffffffffffffffffL, &x45);
-{ uint64_t x48; uint8_t x49 = _subborrow_u64(x46, x33, 0x7ffffffff, &x48);
-{ uint64_t _; uint8_t x52 = _subborrow_u64(x49, x34, 0x0, &_);
-{ uint64_t x53 = cmovznz(x52, x48, x33);
-{ uint64_t x54 = cmovznz(x52, x45, x30);
-{ uint64_t x55 = cmovznz(x52, x42, x27);
-{ uint64_t x56 = cmovznz(x52, x39, x24);
-{ uint64_t x57 = cmovznz(x52, x36, x21);
-out[0] = x53;
-out[1] = x54;
-out[2] = x55;
-out[3] = x56;
-out[4] = x57;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feadd(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x18 = in2[4];
+ { const uint64_t x19 = in2[3];
+ { const uint64_t x17 = in2[2];
+ { const uint64_t x15 = in2[1];
+ { const uint64_t x13 = in2[0];
+ { uint64_t x21; uint8_t x22 = _addcarryx_u64(0x0, x5, x13, &x21);
+ { uint64_t x24; uint8_t x25 = _addcarryx_u64(x22, x7, x15, &x24);
+ { uint64_t x27; uint8_t x28 = _addcarryx_u64(x25, x9, x17, &x27);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u64(x28, x11, x19, &x30);
+ { uint64_t x33; uint8_t x34 = _addcarryx_u64(x31, x10, x18, &x33);
+ { uint64_t x36; uint8_t x37 = _subborrow_u64(0x0, x21, 0xffffffffffffffedL, &x36);
+ { uint64_t x39; uint8_t x40 = _subborrow_u64(x37, x24, 0xffffffffffffffffL, &x39);
+ { uint64_t x42; uint8_t x43 = _subborrow_u64(x40, x27, 0xffffffffffffffffL, &x42);
+ { uint64_t x45; uint8_t x46 = _subborrow_u64(x43, x30, 0xffffffffffffffffL, &x45);
+ { uint64_t x48; uint8_t x49 = _subborrow_u64(x46, x33, 0x7ffffffff, &x48);
+ { uint64_t _; uint8_t x52 = _subborrow_u64(x49, x34, 0x0, &_);
+ { uint64_t x53 = cmovznz(x52, x48, x33);
+ { uint64_t x54 = cmovznz(x52, x45, x30);
+ { uint64_t x55 = cmovznz(x52, x42, x27);
+ { uint64_t x56 = cmovznz(x52, x39, x24);
+ { uint64_t x57 = cmovznz(x52, x36, x21);
+ out[0] = x57;
+ out[1] = x56;
+ out[2] = x55;
+ out[3] = x54;
+ out[4] = x53;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e291m19/femul.c b/src/Specific/montgomery64_2e291m19/femul.c
index ca888aa92..5432d7124 100644
--- a/src/Specific/montgomery64_2e291m19/femul.c
+++ b/src/Specific/montgomery64_2e291m19/femul.c
@@ -1,200 +1,192 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint64_t x22; uint64_t x21 = _mulx_u64(x5, x13, &x22);
-{ uint64_t x25; uint64_t x24 = _mulx_u64(x5, x15, &x25);
-{ uint64_t x28; uint64_t x27 = _mulx_u64(x5, x17, &x28);
-{ uint64_t x31; uint64_t x30 = _mulx_u64(x5, x19, &x31);
-{ uint64_t x34; uint64_t x33 = _mulx_u64(x5, x18, &x34);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(0x0, x22, x24, &x36);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x25, x27, &x39);
-{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x28, x30, &x42);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x31, x33, &x45);
-{ uint64_t x48; uint8_t _ = _addcarryx_u64(0x0, x46, x34, &x48);
-{ uint64_t _; uint64_t x51 = _mulx_u64(x21, 0x86bca1af286bca1bL, &_);
-{ uint64_t x55; uint64_t x54 = _mulx_u64(x51, 0xffffffffffffffedL, &x55);
-{ uint64_t x58; uint64_t x57 = _mulx_u64(x51, 0xffffffffffffffffL, &x58);
-{ uint64_t x61; uint64_t x60 = _mulx_u64(x51, 0xffffffffffffffffL, &x61);
-{ uint64_t x64; uint64_t x63 = _mulx_u64(x51, 0xffffffffffffffffL, &x64);
-{ uint64_t x67; uint64_t x66 = _mulx_u64(x51, 0x7ffffffff, &x67);
-{ uint64_t x69; uint8_t x70 = _addcarryx_u64(0x0, x55, x57, &x69);
-{ uint64_t x72; uint8_t x73 = _addcarryx_u64(x70, x58, x60, &x72);
-{ uint64_t x75; uint8_t x76 = _addcarryx_u64(x73, x61, x63, &x75);
-{ uint64_t x78; uint8_t x79 = _addcarryx_u64(x76, x64, x66, &x78);
-{ uint64_t x81; uint8_t _ = _addcarryx_u64(0x0, x79, x67, &x81);
-{ uint64_t _; uint8_t x85 = _addcarryx_u64(0x0, x21, x54, &_);
-{ uint64_t x87; uint8_t x88 = _addcarryx_u64(x85, x36, x69, &x87);
-{ uint64_t x90; uint8_t x91 = _addcarryx_u64(x88, x39, x72, &x90);
-{ uint64_t x93; uint8_t x94 = _addcarryx_u64(x91, x42, x75, &x93);
-{ uint64_t x96; uint8_t x97 = _addcarryx_u64(x94, x45, x78, &x96);
-{ uint64_t x99; uint8_t x100 = _addcarryx_u64(x97, x48, x81, &x99);
-{ uint64_t x103; uint64_t x102 = _mulx_u64(x7, x13, &x103);
-{ uint64_t x106; uint64_t x105 = _mulx_u64(x7, x15, &x106);
-{ uint64_t x109; uint64_t x108 = _mulx_u64(x7, x17, &x109);
-{ uint64_t x112; uint64_t x111 = _mulx_u64(x7, x19, &x112);
-{ uint64_t x115; uint64_t x114 = _mulx_u64(x7, x18, &x115);
-{ uint64_t x117; uint8_t x118 = _addcarryx_u64(0x0, x103, x105, &x117);
-{ uint64_t x120; uint8_t x121 = _addcarryx_u64(x118, x106, x108, &x120);
-{ uint64_t x123; uint8_t x124 = _addcarryx_u64(x121, x109, x111, &x123);
-{ uint64_t x126; uint8_t x127 = _addcarryx_u64(x124, x112, x114, &x126);
-{ uint64_t x129; uint8_t _ = _addcarryx_u64(0x0, x127, x115, &x129);
-{ uint64_t x132; uint8_t x133 = _addcarryx_u64(0x0, x87, x102, &x132);
-{ uint64_t x135; uint8_t x136 = _addcarryx_u64(x133, x90, x117, &x135);
-{ uint64_t x138; uint8_t x139 = _addcarryx_u64(x136, x93, x120, &x138);
-{ uint64_t x141; uint8_t x142 = _addcarryx_u64(x139, x96, x123, &x141);
-{ uint64_t x144; uint8_t x145 = _addcarryx_u64(x142, x99, x126, &x144);
-{ uint64_t x147; uint8_t x148 = _addcarryx_u64(x145, x100, x129, &x147);
-{ uint64_t _; uint64_t x150 = _mulx_u64(x132, 0x86bca1af286bca1bL, &_);
-{ uint64_t x154; uint64_t x153 = _mulx_u64(x150, 0xffffffffffffffedL, &x154);
-{ uint64_t x157; uint64_t x156 = _mulx_u64(x150, 0xffffffffffffffffL, &x157);
-{ uint64_t x160; uint64_t x159 = _mulx_u64(x150, 0xffffffffffffffffL, &x160);
-{ uint64_t x163; uint64_t x162 = _mulx_u64(x150, 0xffffffffffffffffL, &x163);
-{ uint64_t x166; uint64_t x165 = _mulx_u64(x150, 0x7ffffffff, &x166);
-{ uint64_t x168; uint8_t x169 = _addcarryx_u64(0x0, x154, x156, &x168);
-{ uint64_t x171; uint8_t x172 = _addcarryx_u64(x169, x157, x159, &x171);
-{ uint64_t x174; uint8_t x175 = _addcarryx_u64(x172, x160, x162, &x174);
-{ uint64_t x177; uint8_t x178 = _addcarryx_u64(x175, x163, x165, &x177);
-{ uint64_t x180; uint8_t _ = _addcarryx_u64(0x0, x178, x166, &x180);
-{ uint64_t _; uint8_t x184 = _addcarryx_u64(0x0, x132, x153, &_);
-{ uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x135, x168, &x186);
-{ uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x138, x171, &x189);
-{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x141, x174, &x192);
-{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x144, x177, &x195);
-{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x147, x180, &x198);
-{ uint8_t x200 = (x199 + x148);
-{ uint64_t x203; uint64_t x202 = _mulx_u64(x9, x13, &x203);
-{ uint64_t x206; uint64_t x205 = _mulx_u64(x9, x15, &x206);
-{ uint64_t x209; uint64_t x208 = _mulx_u64(x9, x17, &x209);
-{ uint64_t x212; uint64_t x211 = _mulx_u64(x9, x19, &x212);
-{ uint64_t x215; uint64_t x214 = _mulx_u64(x9, x18, &x215);
-{ uint64_t x217; uint8_t x218 = _addcarryx_u64(0x0, x203, x205, &x217);
-{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x206, x208, &x220);
-{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x209, x211, &x223);
-{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x212, x214, &x226);
-{ uint64_t x229; uint8_t _ = _addcarryx_u64(0x0, x227, x215, &x229);
-{ uint64_t x232; uint8_t x233 = _addcarryx_u64(0x0, x186, x202, &x232);
-{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x189, x217, &x235);
-{ uint64_t x238; uint8_t x239 = _addcarryx_u64(x236, x192, x220, &x238);
-{ uint64_t x241; uint8_t x242 = _addcarryx_u64(x239, x195, x223, &x241);
-{ uint64_t x244; uint8_t x245 = _addcarryx_u64(x242, x198, x226, &x244);
-{ uint64_t x247; uint8_t x248 = _addcarryx_u64(x245, x200, x229, &x247);
-{ uint64_t _; uint64_t x250 = _mulx_u64(x232, 0x86bca1af286bca1bL, &_);
-{ uint64_t x254; uint64_t x253 = _mulx_u64(x250, 0xffffffffffffffedL, &x254);
-{ uint64_t x257; uint64_t x256 = _mulx_u64(x250, 0xffffffffffffffffL, &x257);
-{ uint64_t x260; uint64_t x259 = _mulx_u64(x250, 0xffffffffffffffffL, &x260);
-{ uint64_t x263; uint64_t x262 = _mulx_u64(x250, 0xffffffffffffffffL, &x263);
-{ uint64_t x266; uint64_t x265 = _mulx_u64(x250, 0x7ffffffff, &x266);
-{ uint64_t x268; uint8_t x269 = _addcarryx_u64(0x0, x254, x256, &x268);
-{ uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x257, x259, &x271);
-{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x260, x262, &x274);
-{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x263, x265, &x277);
-{ uint64_t x280; uint8_t _ = _addcarryx_u64(0x0, x278, x266, &x280);
-{ uint64_t _; uint8_t x284 = _addcarryx_u64(0x0, x232, x253, &_);
-{ uint64_t x286; uint8_t x287 = _addcarryx_u64(x284, x235, x268, &x286);
-{ uint64_t x289; uint8_t x290 = _addcarryx_u64(x287, x238, x271, &x289);
-{ uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x241, x274, &x292);
-{ uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x244, x277, &x295);
-{ uint64_t x298; uint8_t x299 = _addcarryx_u64(x296, x247, x280, &x298);
-{ uint8_t x300 = (x299 + x248);
-{ uint64_t x303; uint64_t x302 = _mulx_u64(x11, x13, &x303);
-{ uint64_t x306; uint64_t x305 = _mulx_u64(x11, x15, &x306);
-{ uint64_t x309; uint64_t x308 = _mulx_u64(x11, x17, &x309);
-{ uint64_t x312; uint64_t x311 = _mulx_u64(x11, x19, &x312);
-{ uint64_t x315; uint64_t x314 = _mulx_u64(x11, x18, &x315);
-{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x303, x305, &x317);
-{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x306, x308, &x320);
-{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x309, x311, &x323);
-{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x312, x314, &x326);
-{ uint64_t x329; uint8_t _ = _addcarryx_u64(0x0, x327, x315, &x329);
-{ uint64_t x332; uint8_t x333 = _addcarryx_u64(0x0, x286, x302, &x332);
-{ uint64_t x335; uint8_t x336 = _addcarryx_u64(x333, x289, x317, &x335);
-{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x292, x320, &x338);
-{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x295, x323, &x341);
-{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x298, x326, &x344);
-{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x300, x329, &x347);
-{ uint64_t _; uint64_t x350 = _mulx_u64(x332, 0x86bca1af286bca1bL, &_);
-{ uint64_t x354; uint64_t x353 = _mulx_u64(x350, 0xffffffffffffffedL, &x354);
-{ uint64_t x357; uint64_t x356 = _mulx_u64(x350, 0xffffffffffffffffL, &x357);
-{ uint64_t x360; uint64_t x359 = _mulx_u64(x350, 0xffffffffffffffffL, &x360);
-{ uint64_t x363; uint64_t x362 = _mulx_u64(x350, 0xffffffffffffffffL, &x363);
-{ uint64_t x366; uint64_t x365 = _mulx_u64(x350, 0x7ffffffff, &x366);
-{ uint64_t x368; uint8_t x369 = _addcarryx_u64(0x0, x354, x356, &x368);
-{ uint64_t x371; uint8_t x372 = _addcarryx_u64(x369, x357, x359, &x371);
-{ uint64_t x374; uint8_t x375 = _addcarryx_u64(x372, x360, x362, &x374);
-{ uint64_t x377; uint8_t x378 = _addcarryx_u64(x375, x363, x365, &x377);
-{ uint64_t x380; uint8_t _ = _addcarryx_u64(0x0, x378, x366, &x380);
-{ uint64_t _; uint8_t x384 = _addcarryx_u64(0x0, x332, x353, &_);
-{ uint64_t x386; uint8_t x387 = _addcarryx_u64(x384, x335, x368, &x386);
-{ uint64_t x389; uint8_t x390 = _addcarryx_u64(x387, x338, x371, &x389);
-{ uint64_t x392; uint8_t x393 = _addcarryx_u64(x390, x341, x374, &x392);
-{ uint64_t x395; uint8_t x396 = _addcarryx_u64(x393, x344, x377, &x395);
-{ uint64_t x398; uint8_t x399 = _addcarryx_u64(x396, x347, x380, &x398);
-{ uint8_t x400 = (x399 + x348);
-{ uint64_t x403; uint64_t x402 = _mulx_u64(x10, x13, &x403);
-{ uint64_t x406; uint64_t x405 = _mulx_u64(x10, x15, &x406);
-{ uint64_t x409; uint64_t x408 = _mulx_u64(x10, x17, &x409);
-{ uint64_t x412; uint64_t x411 = _mulx_u64(x10, x19, &x412);
-{ uint64_t x415; uint64_t x414 = _mulx_u64(x10, x18, &x415);
-{ uint64_t x417; uint8_t x418 = _addcarryx_u64(0x0, x403, x405, &x417);
-{ uint64_t x420; uint8_t x421 = _addcarryx_u64(x418, x406, x408, &x420);
-{ uint64_t x423; uint8_t x424 = _addcarryx_u64(x421, x409, x411, &x423);
-{ uint64_t x426; uint8_t x427 = _addcarryx_u64(x424, x412, x414, &x426);
-{ uint64_t x429; uint8_t _ = _addcarryx_u64(0x0, x427, x415, &x429);
-{ uint64_t x432; uint8_t x433 = _addcarryx_u64(0x0, x386, x402, &x432);
-{ uint64_t x435; uint8_t x436 = _addcarryx_u64(x433, x389, x417, &x435);
-{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x392, x420, &x438);
-{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x395, x423, &x441);
-{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x398, x426, &x444);
-{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x400, x429, &x447);
-{ uint64_t _; uint64_t x450 = _mulx_u64(x432, 0x86bca1af286bca1bL, &_);
-{ uint64_t x454; uint64_t x453 = _mulx_u64(x450, 0xffffffffffffffedL, &x454);
-{ uint64_t x457; uint64_t x456 = _mulx_u64(x450, 0xffffffffffffffffL, &x457);
-{ uint64_t x460; uint64_t x459 = _mulx_u64(x450, 0xffffffffffffffffL, &x460);
-{ uint64_t x463; uint64_t x462 = _mulx_u64(x450, 0xffffffffffffffffL, &x463);
-{ uint64_t x466; uint64_t x465 = _mulx_u64(x450, 0x7ffffffff, &x466);
-{ uint64_t x468; uint8_t x469 = _addcarryx_u64(0x0, x454, x456, &x468);
-{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x457, x459, &x471);
-{ uint64_t x474; uint8_t x475 = _addcarryx_u64(x472, x460, x462, &x474);
-{ uint64_t x477; uint8_t x478 = _addcarryx_u64(x475, x463, x465, &x477);
-{ uint64_t x480; uint8_t _ = _addcarryx_u64(0x0, x478, x466, &x480);
-{ uint64_t _; uint8_t x484 = _addcarryx_u64(0x0, x432, x453, &_);
-{ uint64_t x486; uint8_t x487 = _addcarryx_u64(x484, x435, x468, &x486);
-{ uint64_t x489; uint8_t x490 = _addcarryx_u64(x487, x438, x471, &x489);
-{ uint64_t x492; uint8_t x493 = _addcarryx_u64(x490, x441, x474, &x492);
-{ uint64_t x495; uint8_t x496 = _addcarryx_u64(x493, x444, x477, &x495);
-{ uint64_t x498; uint8_t x499 = _addcarryx_u64(x496, x447, x480, &x498);
-{ uint8_t x500 = (x499 + x448);
-{ uint64_t x502; uint8_t x503 = _subborrow_u64(0x0, x486, 0xffffffffffffffedL, &x502);
-{ uint64_t x505; uint8_t x506 = _subborrow_u64(x503, x489, 0xffffffffffffffffL, &x505);
-{ uint64_t x508; uint8_t x509 = _subborrow_u64(x506, x492, 0xffffffffffffffffL, &x508);
-{ uint64_t x511; uint8_t x512 = _subborrow_u64(x509, x495, 0xffffffffffffffffL, &x511);
-{ uint64_t x514; uint8_t x515 = _subborrow_u64(x512, x498, 0x7ffffffff, &x514);
-{ uint64_t _; uint8_t x518 = _subborrow_u64(x515, x500, 0x0, &_);
-{ uint64_t x519 = cmovznz(x518, x514, x498);
-{ uint64_t x520 = cmovznz(x518, x511, x495);
-{ uint64_t x521 = cmovznz(x518, x508, x492);
-{ uint64_t x522 = cmovznz(x518, x505, x489);
-{ uint64_t x523 = cmovznz(x518, x502, x486);
-out[0] = x519;
-out[1] = x520;
-out[2] = x521;
-out[3] = x522;
-out[4] = x523;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x18 = in2[4];
+ { const uint64_t x19 = in2[3];
+ { const uint64_t x17 = in2[2];
+ { const uint64_t x15 = in2[1];
+ { const uint64_t x13 = in2[0];
+ { uint64_t x22; uint64_t x21 = _mulx_u64(x5, x13, &x22);
+ { uint64_t x25; uint64_t x24 = _mulx_u64(x5, x15, &x25);
+ { uint64_t x28; uint64_t x27 = _mulx_u64(x5, x17, &x28);
+ { uint64_t x31; uint64_t x30 = _mulx_u64(x5, x19, &x31);
+ { uint64_t x34; uint64_t x33 = _mulx_u64(x5, x18, &x34);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(0x0, x22, x24, &x36);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x25, x27, &x39);
+ { uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x28, x30, &x42);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x31, x33, &x45);
+ { uint64_t x48; uint8_t _ = _addcarryx_u64(0x0, x46, x34, &x48);
+ { uint64_t _; uint64_t x51 = _mulx_u64(x21, 0x86bca1af286bca1bL, &_);
+ { uint64_t x55; uint64_t x54 = _mulx_u64(x51, 0xffffffffffffffedL, &x55);
+ { uint64_t x58; uint64_t x57 = _mulx_u64(x51, 0xffffffffffffffffL, &x58);
+ { uint64_t x61; uint64_t x60 = _mulx_u64(x51, 0xffffffffffffffffL, &x61);
+ { uint64_t x64; uint64_t x63 = _mulx_u64(x51, 0xffffffffffffffffL, &x64);
+ { uint64_t x67; uint64_t x66 = _mulx_u64(x51, 0x7ffffffff, &x67);
+ { uint64_t x69; uint8_t x70 = _addcarryx_u64(0x0, x55, x57, &x69);
+ { uint64_t x72; uint8_t x73 = _addcarryx_u64(x70, x58, x60, &x72);
+ { uint64_t x75; uint8_t x76 = _addcarryx_u64(x73, x61, x63, &x75);
+ { uint64_t x78; uint8_t x79 = _addcarryx_u64(x76, x64, x66, &x78);
+ { uint64_t x81; uint8_t _ = _addcarryx_u64(0x0, x79, x67, &x81);
+ { uint64_t _; uint8_t x85 = _addcarryx_u64(0x0, x21, x54, &_);
+ { uint64_t x87; uint8_t x88 = _addcarryx_u64(x85, x36, x69, &x87);
+ { uint64_t x90; uint8_t x91 = _addcarryx_u64(x88, x39, x72, &x90);
+ { uint64_t x93; uint8_t x94 = _addcarryx_u64(x91, x42, x75, &x93);
+ { uint64_t x96; uint8_t x97 = _addcarryx_u64(x94, x45, x78, &x96);
+ { uint64_t x99; uint8_t x100 = _addcarryx_u64(x97, x48, x81, &x99);
+ { uint64_t x103; uint64_t x102 = _mulx_u64(x7, x13, &x103);
+ { uint64_t x106; uint64_t x105 = _mulx_u64(x7, x15, &x106);
+ { uint64_t x109; uint64_t x108 = _mulx_u64(x7, x17, &x109);
+ { uint64_t x112; uint64_t x111 = _mulx_u64(x7, x19, &x112);
+ { uint64_t x115; uint64_t x114 = _mulx_u64(x7, x18, &x115);
+ { uint64_t x117; uint8_t x118 = _addcarryx_u64(0x0, x103, x105, &x117);
+ { uint64_t x120; uint8_t x121 = _addcarryx_u64(x118, x106, x108, &x120);
+ { uint64_t x123; uint8_t x124 = _addcarryx_u64(x121, x109, x111, &x123);
+ { uint64_t x126; uint8_t x127 = _addcarryx_u64(x124, x112, x114, &x126);
+ { uint64_t x129; uint8_t _ = _addcarryx_u64(0x0, x127, x115, &x129);
+ { uint64_t x132; uint8_t x133 = _addcarryx_u64(0x0, x87, x102, &x132);
+ { uint64_t x135; uint8_t x136 = _addcarryx_u64(x133, x90, x117, &x135);
+ { uint64_t x138; uint8_t x139 = _addcarryx_u64(x136, x93, x120, &x138);
+ { uint64_t x141; uint8_t x142 = _addcarryx_u64(x139, x96, x123, &x141);
+ { uint64_t x144; uint8_t x145 = _addcarryx_u64(x142, x99, x126, &x144);
+ { uint64_t x147; uint8_t x148 = _addcarryx_u64(x145, x100, x129, &x147);
+ { uint64_t _; uint64_t x150 = _mulx_u64(x132, 0x86bca1af286bca1bL, &_);
+ { uint64_t x154; uint64_t x153 = _mulx_u64(x150, 0xffffffffffffffedL, &x154);
+ { uint64_t x157; uint64_t x156 = _mulx_u64(x150, 0xffffffffffffffffL, &x157);
+ { uint64_t x160; uint64_t x159 = _mulx_u64(x150, 0xffffffffffffffffL, &x160);
+ { uint64_t x163; uint64_t x162 = _mulx_u64(x150, 0xffffffffffffffffL, &x163);
+ { uint64_t x166; uint64_t x165 = _mulx_u64(x150, 0x7ffffffff, &x166);
+ { uint64_t x168; uint8_t x169 = _addcarryx_u64(0x0, x154, x156, &x168);
+ { uint64_t x171; uint8_t x172 = _addcarryx_u64(x169, x157, x159, &x171);
+ { uint64_t x174; uint8_t x175 = _addcarryx_u64(x172, x160, x162, &x174);
+ { uint64_t x177; uint8_t x178 = _addcarryx_u64(x175, x163, x165, &x177);
+ { uint64_t x180; uint8_t _ = _addcarryx_u64(0x0, x178, x166, &x180);
+ { uint64_t _; uint8_t x184 = _addcarryx_u64(0x0, x132, x153, &_);
+ { uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x135, x168, &x186);
+ { uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x138, x171, &x189);
+ { uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x141, x174, &x192);
+ { uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x144, x177, &x195);
+ { uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x147, x180, &x198);
+ { uint8_t x200 = (x199 + x148);
+ { uint64_t x203; uint64_t x202 = _mulx_u64(x9, x13, &x203);
+ { uint64_t x206; uint64_t x205 = _mulx_u64(x9, x15, &x206);
+ { uint64_t x209; uint64_t x208 = _mulx_u64(x9, x17, &x209);
+ { uint64_t x212; uint64_t x211 = _mulx_u64(x9, x19, &x212);
+ { uint64_t x215; uint64_t x214 = _mulx_u64(x9, x18, &x215);
+ { uint64_t x217; uint8_t x218 = _addcarryx_u64(0x0, x203, x205, &x217);
+ { uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x206, x208, &x220);
+ { uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x209, x211, &x223);
+ { uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x212, x214, &x226);
+ { uint64_t x229; uint8_t _ = _addcarryx_u64(0x0, x227, x215, &x229);
+ { uint64_t x232; uint8_t x233 = _addcarryx_u64(0x0, x186, x202, &x232);
+ { uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x189, x217, &x235);
+ { uint64_t x238; uint8_t x239 = _addcarryx_u64(x236, x192, x220, &x238);
+ { uint64_t x241; uint8_t x242 = _addcarryx_u64(x239, x195, x223, &x241);
+ { uint64_t x244; uint8_t x245 = _addcarryx_u64(x242, x198, x226, &x244);
+ { uint64_t x247; uint8_t x248 = _addcarryx_u64(x245, x200, x229, &x247);
+ { uint64_t _; uint64_t x250 = _mulx_u64(x232, 0x86bca1af286bca1bL, &_);
+ { uint64_t x254; uint64_t x253 = _mulx_u64(x250, 0xffffffffffffffedL, &x254);
+ { uint64_t x257; uint64_t x256 = _mulx_u64(x250, 0xffffffffffffffffL, &x257);
+ { uint64_t x260; uint64_t x259 = _mulx_u64(x250, 0xffffffffffffffffL, &x260);
+ { uint64_t x263; uint64_t x262 = _mulx_u64(x250, 0xffffffffffffffffL, &x263);
+ { uint64_t x266; uint64_t x265 = _mulx_u64(x250, 0x7ffffffff, &x266);
+ { uint64_t x268; uint8_t x269 = _addcarryx_u64(0x0, x254, x256, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x257, x259, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x260, x262, &x274);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x263, x265, &x277);
+ { uint64_t x280; uint8_t _ = _addcarryx_u64(0x0, x278, x266, &x280);
+ { uint64_t _; uint8_t x284 = _addcarryx_u64(0x0, x232, x253, &_);
+ { uint64_t x286; uint8_t x287 = _addcarryx_u64(x284, x235, x268, &x286);
+ { uint64_t x289; uint8_t x290 = _addcarryx_u64(x287, x238, x271, &x289);
+ { uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x241, x274, &x292);
+ { uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x244, x277, &x295);
+ { uint64_t x298; uint8_t x299 = _addcarryx_u64(x296, x247, x280, &x298);
+ { uint8_t x300 = (x299 + x248);
+ { uint64_t x303; uint64_t x302 = _mulx_u64(x11, x13, &x303);
+ { uint64_t x306; uint64_t x305 = _mulx_u64(x11, x15, &x306);
+ { uint64_t x309; uint64_t x308 = _mulx_u64(x11, x17, &x309);
+ { uint64_t x312; uint64_t x311 = _mulx_u64(x11, x19, &x312);
+ { uint64_t x315; uint64_t x314 = _mulx_u64(x11, x18, &x315);
+ { uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x303, x305, &x317);
+ { uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x306, x308, &x320);
+ { uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x309, x311, &x323);
+ { uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x312, x314, &x326);
+ { uint64_t x329; uint8_t _ = _addcarryx_u64(0x0, x327, x315, &x329);
+ { uint64_t x332; uint8_t x333 = _addcarryx_u64(0x0, x286, x302, &x332);
+ { uint64_t x335; uint8_t x336 = _addcarryx_u64(x333, x289, x317, &x335);
+ { uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x292, x320, &x338);
+ { uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x295, x323, &x341);
+ { uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x298, x326, &x344);
+ { uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x300, x329, &x347);
+ { uint64_t _; uint64_t x350 = _mulx_u64(x332, 0x86bca1af286bca1bL, &_);
+ { uint64_t x354; uint64_t x353 = _mulx_u64(x350, 0xffffffffffffffedL, &x354);
+ { uint64_t x357; uint64_t x356 = _mulx_u64(x350, 0xffffffffffffffffL, &x357);
+ { uint64_t x360; uint64_t x359 = _mulx_u64(x350, 0xffffffffffffffffL, &x360);
+ { uint64_t x363; uint64_t x362 = _mulx_u64(x350, 0xffffffffffffffffL, &x363);
+ { uint64_t x366; uint64_t x365 = _mulx_u64(x350, 0x7ffffffff, &x366);
+ { uint64_t x368; uint8_t x369 = _addcarryx_u64(0x0, x354, x356, &x368);
+ { uint64_t x371; uint8_t x372 = _addcarryx_u64(x369, x357, x359, &x371);
+ { uint64_t x374; uint8_t x375 = _addcarryx_u64(x372, x360, x362, &x374);
+ { uint64_t x377; uint8_t x378 = _addcarryx_u64(x375, x363, x365, &x377);
+ { uint64_t x380; uint8_t _ = _addcarryx_u64(0x0, x378, x366, &x380);
+ { uint64_t _; uint8_t x384 = _addcarryx_u64(0x0, x332, x353, &_);
+ { uint64_t x386; uint8_t x387 = _addcarryx_u64(x384, x335, x368, &x386);
+ { uint64_t x389; uint8_t x390 = _addcarryx_u64(x387, x338, x371, &x389);
+ { uint64_t x392; uint8_t x393 = _addcarryx_u64(x390, x341, x374, &x392);
+ { uint64_t x395; uint8_t x396 = _addcarryx_u64(x393, x344, x377, &x395);
+ { uint64_t x398; uint8_t x399 = _addcarryx_u64(x396, x347, x380, &x398);
+ { uint8_t x400 = (x399 + x348);
+ { uint64_t x403; uint64_t x402 = _mulx_u64(x10, x13, &x403);
+ { uint64_t x406; uint64_t x405 = _mulx_u64(x10, x15, &x406);
+ { uint64_t x409; uint64_t x408 = _mulx_u64(x10, x17, &x409);
+ { uint64_t x412; uint64_t x411 = _mulx_u64(x10, x19, &x412);
+ { uint64_t x415; uint64_t x414 = _mulx_u64(x10, x18, &x415);
+ { uint64_t x417; uint8_t x418 = _addcarryx_u64(0x0, x403, x405, &x417);
+ { uint64_t x420; uint8_t x421 = _addcarryx_u64(x418, x406, x408, &x420);
+ { uint64_t x423; uint8_t x424 = _addcarryx_u64(x421, x409, x411, &x423);
+ { uint64_t x426; uint8_t x427 = _addcarryx_u64(x424, x412, x414, &x426);
+ { uint64_t x429; uint8_t _ = _addcarryx_u64(0x0, x427, x415, &x429);
+ { uint64_t x432; uint8_t x433 = _addcarryx_u64(0x0, x386, x402, &x432);
+ { uint64_t x435; uint8_t x436 = _addcarryx_u64(x433, x389, x417, &x435);
+ { uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x392, x420, &x438);
+ { uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x395, x423, &x441);
+ { uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x398, x426, &x444);
+ { uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x400, x429, &x447);
+ { uint64_t _; uint64_t x450 = _mulx_u64(x432, 0x86bca1af286bca1bL, &_);
+ { uint64_t x454; uint64_t x453 = _mulx_u64(x450, 0xffffffffffffffedL, &x454);
+ { uint64_t x457; uint64_t x456 = _mulx_u64(x450, 0xffffffffffffffffL, &x457);
+ { uint64_t x460; uint64_t x459 = _mulx_u64(x450, 0xffffffffffffffffL, &x460);
+ { uint64_t x463; uint64_t x462 = _mulx_u64(x450, 0xffffffffffffffffL, &x463);
+ { uint64_t x466; uint64_t x465 = _mulx_u64(x450, 0x7ffffffff, &x466);
+ { uint64_t x468; uint8_t x469 = _addcarryx_u64(0x0, x454, x456, &x468);
+ { uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x457, x459, &x471);
+ { uint64_t x474; uint8_t x475 = _addcarryx_u64(x472, x460, x462, &x474);
+ { uint64_t x477; uint8_t x478 = _addcarryx_u64(x475, x463, x465, &x477);
+ { uint64_t x480; uint8_t _ = _addcarryx_u64(0x0, x478, x466, &x480);
+ { uint64_t _; uint8_t x484 = _addcarryx_u64(0x0, x432, x453, &_);
+ { uint64_t x486; uint8_t x487 = _addcarryx_u64(x484, x435, x468, &x486);
+ { uint64_t x489; uint8_t x490 = _addcarryx_u64(x487, x438, x471, &x489);
+ { uint64_t x492; uint8_t x493 = _addcarryx_u64(x490, x441, x474, &x492);
+ { uint64_t x495; uint8_t x496 = _addcarryx_u64(x493, x444, x477, &x495);
+ { uint64_t x498; uint8_t x499 = _addcarryx_u64(x496, x447, x480, &x498);
+ { uint8_t x500 = (x499 + x448);
+ { uint64_t x502; uint8_t x503 = _subborrow_u64(0x0, x486, 0xffffffffffffffedL, &x502);
+ { uint64_t x505; uint8_t x506 = _subborrow_u64(x503, x489, 0xffffffffffffffffL, &x505);
+ { uint64_t x508; uint8_t x509 = _subborrow_u64(x506, x492, 0xffffffffffffffffL, &x508);
+ { uint64_t x511; uint8_t x512 = _subborrow_u64(x509, x495, 0xffffffffffffffffL, &x511);
+ { uint64_t x514; uint8_t x515 = _subborrow_u64(x512, x498, 0x7ffffffff, &x514);
+ { uint64_t _; uint8_t x518 = _subborrow_u64(x515, x500, 0x0, &_);
+ { uint64_t x519 = cmovznz(x518, x514, x498);
+ { uint64_t x520 = cmovznz(x518, x511, x495);
+ { uint64_t x521 = cmovznz(x518, x508, x492);
+ { uint64_t x522 = cmovznz(x518, x505, x489);
+ { uint64_t x523 = cmovznz(x518, x502, x486);
+ out[0] = x523;
+ out[1] = x522;
+ out[2] = x521;
+ out[3] = x520;
+ out[4] = x519;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e291m19/fenz.c b/src/Specific/montgomery64_2e291m19/fenz.c
index aaabff8a7..0a779af7c 100644
--- a/src/Specific/montgomery64_2e291m19/fenz.c
+++ b/src/Specific/montgomery64_2e291m19/fenz.c
@@ -1,26 +1,13 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x9 = (x8 | x7);
-{ uint64_t x10 = (x6 | x9);
-{ uint64_t x11 = (x4 | x10);
-{ uint64_t x12 = (x2 | x11);
-out[0] = x12;
-}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x9 = (x8 | x7);
+ { uint64_t x10 = (x6 | x9);
+ { uint64_t x11 = (x4 | x10);
+ { uint64_t x12 = (x2 | x11);
+ out[0] = x12;
+ }}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e291m19/feopp.c b/src/Specific/montgomery64_2e291m19/feopp.c
index 34d8efd27..88987b5fd 100644
--- a/src/Specific/montgomery64_2e291m19/feopp.c
+++ b/src/Specific/montgomery64_2e291m19/feopp.c
@@ -1,42 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x10; uint8_t x11 = _subborrow_u64(0x0, 0x0, x2, &x10);
-{ uint64_t x13; uint8_t x14 = _subborrow_u64(x11, 0x0, x4, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, 0x0, x6, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, 0x0, x8, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u64(x20, 0x0, x7, &x22);
-{ uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
-{ uint64_t x25 = (x24 & 0xffffffffffffffedL);
-{ uint64_t x27; uint8_t x28 = _addcarryx_u64(0x0, x10, x25, &x27);
-{ uint64_t x29 = (x24 & 0xffffffffffffffffL);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x28, x13, x29, &x31);
-{ uint64_t x33 = (x24 & 0xffffffffffffffffL);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x16, x33, &x35);
-{ uint64_t x37 = (x24 & 0xffffffffffffffffL);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x19, x37, &x39);
-{ uint64_t x41 = (x24 & 0x7ffffffff);
-{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x22, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-out[4] = x27;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void feopp(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x10; uint8_t x11 = _subborrow_u64(0x0, 0x0, x2, &x10);
+ { uint64_t x13; uint8_t x14 = _subborrow_u64(x11, 0x0, x4, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u64(x14, 0x0, x6, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u64(x17, 0x0, x8, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u64(x20, 0x0, x7, &x22);
+ { uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
+ { uint64_t x25 = (x24 & 0xffffffffffffffedL);
+ { uint64_t x27; uint8_t x28 = _addcarryx_u64(0x0, x10, x25, &x27);
+ { uint64_t x29 = (x24 & 0xffffffffffffffffL);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(x28, x13, x29, &x31);
+ { uint64_t x33 = (x24 & 0xffffffffffffffffL);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x16, x33, &x35);
+ { uint64_t x37 = (x24 & 0xffffffffffffffffL);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x19, x37, &x39);
+ { uint64_t x41 = (x24 & 0x7ffffffff);
+ { uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x22, x41, &x43);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e291m19/fesub.c b/src/Specific/montgomery64_2e291m19/fesub.c
index e45349aea..bc9a9496c 100644
--- a/src/Specific/montgomery64_2e291m19/fesub.c
+++ b/src/Specific/montgomery64_2e291m19/fesub.c
@@ -1,42 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint64_t x21; uint8_t x22 = _subborrow_u64(0x0, x5, x13, &x21);
-{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, x7, x15, &x24);
-{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, x9, x17, &x27);
-{ uint64_t x30; uint8_t x31 = _subborrow_u64(x28, x11, x19, &x30);
-{ uint64_t x33; uint8_t x34 = _subborrow_u64(x31, x10, x18, &x33);
-{ uint64_t x35 = (uint64_t)cmovznz(x34, 0x0, 0xffffffffffffffffL);
-{ uint64_t x36 = (x35 & 0xffffffffffffffedL);
-{ uint64_t x38; uint8_t x39 = _addcarryx_u64(0x0, x21, x36, &x38);
-{ uint64_t x40 = (x35 & 0xffffffffffffffffL);
-{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x39, x24, x40, &x42);
-{ uint64_t x44 = (x35 & 0xffffffffffffffffL);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x43, x27, x44, &x46);
-{ uint64_t x48 = (x35 & 0xffffffffffffffffL);
-{ uint64_t x50; uint8_t x51 = _addcarryx_u64(x47, x30, x48, &x50);
-{ uint64_t x52 = (x35 & 0x7ffffffff);
-{ uint64_t x54; uint8_t _ = _addcarryx_u64(x51, x33, x52, &x54);
-out[0] = x54;
-out[1] = x50;
-out[2] = x46;
-out[3] = x42;
-out[4] = x38;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesub(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x18 = in2[4];
+ { const uint64_t x19 = in2[3];
+ { const uint64_t x17 = in2[2];
+ { const uint64_t x15 = in2[1];
+ { const uint64_t x13 = in2[0];
+ { uint64_t x21; uint8_t x22 = _subborrow_u64(0x0, x5, x13, &x21);
+ { uint64_t x24; uint8_t x25 = _subborrow_u64(x22, x7, x15, &x24);
+ { uint64_t x27; uint8_t x28 = _subborrow_u64(x25, x9, x17, &x27);
+ { uint64_t x30; uint8_t x31 = _subborrow_u64(x28, x11, x19, &x30);
+ { uint64_t x33; uint8_t x34 = _subborrow_u64(x31, x10, x18, &x33);
+ { uint64_t x35 = (uint64_t)cmovznz(x34, 0x0, 0xffffffffffffffffL);
+ { uint64_t x36 = (x35 & 0xffffffffffffffedL);
+ { uint64_t x38; uint8_t x39 = _addcarryx_u64(0x0, x21, x36, &x38);
+ { uint64_t x40 = (x35 & 0xffffffffffffffffL);
+ { uint64_t x42; uint8_t x43 = _addcarryx_u64(x39, x24, x40, &x42);
+ { uint64_t x44 = (x35 & 0xffffffffffffffffL);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x43, x27, x44, &x46);
+ { uint64_t x48 = (x35 & 0xffffffffffffffffL);
+ { uint64_t x50; uint8_t x51 = _addcarryx_u64(x47, x30, x48, &x50);
+ { uint64_t x52 = (x35 & 0x7ffffffff);
+ { uint64_t x54; uint8_t _ = _addcarryx_u64(x51, x33, x52, &x54);
+ out[0] = x38;
+ out[1] = x42;
+ out[2] = x46;
+ out[3] = x50;
+ out[4] = x54;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e321m9/feadd.c b/src/Specific/montgomery64_2e321m9/feadd.c
index 28d3c78e3..71ed9e56d 100644
--- a/src/Specific/montgomery64_2e321m9/feadd.c
+++ b/src/Specific/montgomery64_2e321m9/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
-{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xfffffffffffffff7L, &x43);
-{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
-{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
-{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
-{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
-{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x1, &x58);
-{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
-{ uint64_t x63 = cmovznz(x62, x58, x40);
-{ uint64_t x64 = cmovznz(x62, x55, x37);
-{ uint64_t x65 = cmovznz(x62, x52, x34);
-{ uint64_t x66 = cmovznz(x62, x49, x31);
-{ uint64_t x67 = cmovznz(x62, x46, x28);
-{ uint64_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+ { uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xfffffffffffffff7L, &x43);
+ { uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+ { uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+ { uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+ { uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+ { uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x1, &x58);
+ { uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+ { uint64_t x63 = cmovznz(x62, x58, x40);
+ { uint64_t x64 = cmovznz(x62, x55, x37);
+ { uint64_t x65 = cmovznz(x62, x52, x34);
+ { uint64_t x66 = cmovznz(x62, x49, x31);
+ { uint64_t x67 = cmovznz(x62, x46, x28);
+ { uint64_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e321m9/femul.c b/src/Specific/montgomery64_2e321m9/femul.c
index 07a92880a..d5c7ea9d1 100644
--- a/src/Specific/montgomery64_2e321m9/femul.c
+++ b/src/Specific/montgomery64_2e321m9/femul.c
@@ -1,260 +1,254 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
-{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
-{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
-{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
-{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
-{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0x8e38e38e38e38e39L, &_);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xfffffffffffffff7L, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
-{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
-{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
-{ uint64_t x79; uint8_t x80 = _addcarryx_u64(0x0, x65, x67, &x79);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(x80, x68, x70, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x71, x73, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x74, x76, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x77, x61, &x91);
-{ uint64_t _; uint8_t x95 = _addcarryx_u64(0x0, x25, x64, &_);
-{ uint64_t x97; uint8_t x98 = _addcarryx_u64(x95, x43, x79, &x97);
-{ uint64_t x100; uint8_t x101 = _addcarryx_u64(x98, x46, x82, &x100);
-{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x49, x85, &x103);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x52, x88, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x55, x91, &x109);
-{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x58, x92, &x112);
-{ uint64_t x116; uint64_t x115 = _mulx_u64(x7, x15, &x116);
-{ uint64_t x119; uint64_t x118 = _mulx_u64(x7, x17, &x119);
-{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x19, &x122);
-{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x21, &x125);
-{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x23, &x128);
-{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x22, &x131);
-{ uint64_t x133; uint8_t x134 = _addcarryx_u64(0x0, x116, x118, &x133);
-{ uint64_t x136; uint8_t x137 = _addcarryx_u64(x134, x119, x121, &x136);
-{ uint64_t x139; uint8_t x140 = _addcarryx_u64(x137, x122, x124, &x139);
-{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
-{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
-{ uint64_t x148; uint8_t _ = _addcarryx_u64(0x0, x146, x131, &x148);
-{ uint64_t x151; uint8_t x152 = _addcarryx_u64(0x0, x97, x115, &x151);
-{ uint64_t x154; uint8_t x155 = _addcarryx_u64(x152, x100, x133, &x154);
-{ uint64_t x157; uint8_t x158 = _addcarryx_u64(x155, x103, x136, &x157);
-{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
-{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
-{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
-{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x113, x148, &x169);
-{ uint64_t _; uint64_t x172 = _mulx_u64(x151, 0x8e38e38e38e38e39L, &_);
-{ uint64_t x176; uint64_t x175 = _mulx_u64(x172, 0xfffffffffffffff7L, &x176);
-{ uint64_t x179; uint64_t x178 = _mulx_u64(x172, 0xffffffffffffffffL, &x179);
-{ uint64_t x182; uint64_t x181 = _mulx_u64(x172, 0xffffffffffffffffL, &x182);
-{ uint64_t x185; uint64_t x184 = _mulx_u64(x172, 0xffffffffffffffffL, &x185);
-{ uint64_t x188; uint64_t x187 = _mulx_u64(x172, 0xffffffffffffffffL, &x188);
-{ uint64_t x190; uint8_t x191 = _addcarryx_u64(0x0, x176, x178, &x190);
-{ uint64_t x193; uint8_t x194 = _addcarryx_u64(x191, x179, x181, &x193);
-{ uint64_t x196; uint8_t x197 = _addcarryx_u64(x194, x182, x184, &x196);
-{ uint64_t x199; uint8_t x200 = _addcarryx_u64(x197, x185, x187, &x199);
-{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x188, x172, &x202);
-{ uint64_t _; uint8_t x206 = _addcarryx_u64(0x0, x151, x175, &_);
-{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x154, x190, &x208);
-{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x157, x193, &x211);
-{ uint64_t x214; uint8_t x215 = _addcarryx_u64(x212, x160, x196, &x214);
-{ uint64_t x217; uint8_t x218 = _addcarryx_u64(x215, x163, x199, &x217);
-{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x166, x202, &x220);
-{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x169, x203, &x223);
-{ uint8_t x225 = (x224 + x170);
-{ uint64_t x228; uint64_t x227 = _mulx_u64(x9, x15, &x228);
-{ uint64_t x231; uint64_t x230 = _mulx_u64(x9, x17, &x231);
-{ uint64_t x234; uint64_t x233 = _mulx_u64(x9, x19, &x234);
-{ uint64_t x237; uint64_t x236 = _mulx_u64(x9, x21, &x237);
-{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x23, &x240);
-{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x22, &x243);
-{ uint64_t x245; uint8_t x246 = _addcarryx_u64(0x0, x228, x230, &x245);
-{ uint64_t x248; uint8_t x249 = _addcarryx_u64(x246, x231, x233, &x248);
-{ uint64_t x251; uint8_t x252 = _addcarryx_u64(x249, x234, x236, &x251);
-{ uint64_t x254; uint8_t x255 = _addcarryx_u64(x252, x237, x239, &x254);
-{ uint64_t x257; uint8_t x258 = _addcarryx_u64(x255, x240, x242, &x257);
-{ uint64_t x260; uint8_t _ = _addcarryx_u64(0x0, x258, x243, &x260);
-{ uint64_t x263; uint8_t x264 = _addcarryx_u64(0x0, x208, x227, &x263);
-{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x211, x245, &x266);
-{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x214, x248, &x269);
-{ uint64_t x272; uint8_t x273 = _addcarryx_u64(x270, x217, x251, &x272);
-{ uint64_t x275; uint8_t x276 = _addcarryx_u64(x273, x220, x254, &x275);
-{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
-{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x225, x260, &x281);
-{ uint64_t _; uint64_t x284 = _mulx_u64(x263, 0x8e38e38e38e38e39L, &_);
-{ uint64_t x288; uint64_t x287 = _mulx_u64(x284, 0xfffffffffffffff7L, &x288);
-{ uint64_t x291; uint64_t x290 = _mulx_u64(x284, 0xffffffffffffffffL, &x291);
-{ uint64_t x294; uint64_t x293 = _mulx_u64(x284, 0xffffffffffffffffL, &x294);
-{ uint64_t x297; uint64_t x296 = _mulx_u64(x284, 0xffffffffffffffffL, &x297);
-{ uint64_t x300; uint64_t x299 = _mulx_u64(x284, 0xffffffffffffffffL, &x300);
-{ uint64_t x302; uint8_t x303 = _addcarryx_u64(0x0, x288, x290, &x302);
-{ uint64_t x305; uint8_t x306 = _addcarryx_u64(x303, x291, x293, &x305);
-{ uint64_t x308; uint8_t x309 = _addcarryx_u64(x306, x294, x296, &x308);
-{ uint64_t x311; uint8_t x312 = _addcarryx_u64(x309, x297, x299, &x311);
-{ uint64_t x314; uint8_t x315 = _addcarryx_u64(x312, x300, x284, &x314);
-{ uint64_t _; uint8_t x318 = _addcarryx_u64(0x0, x263, x287, &_);
-{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x266, x302, &x320);
-{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x269, x305, &x323);
-{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x272, x308, &x326);
-{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x275, x311, &x329);
-{ uint64_t x332; uint8_t x333 = _addcarryx_u64(x330, x278, x314, &x332);
-{ uint64_t x335; uint8_t x336 = _addcarryx_u64(x333, x281, x315, &x335);
-{ uint8_t x337 = (x336 + x282);
-{ uint64_t x340; uint64_t x339 = _mulx_u64(x11, x15, &x340);
-{ uint64_t x343; uint64_t x342 = _mulx_u64(x11, x17, &x343);
-{ uint64_t x346; uint64_t x345 = _mulx_u64(x11, x19, &x346);
-{ uint64_t x349; uint64_t x348 = _mulx_u64(x11, x21, &x349);
-{ uint64_t x352; uint64_t x351 = _mulx_u64(x11, x23, &x352);
-{ uint64_t x355; uint64_t x354 = _mulx_u64(x11, x22, &x355);
-{ uint64_t x357; uint8_t x358 = _addcarryx_u64(0x0, x340, x342, &x357);
-{ uint64_t x360; uint8_t x361 = _addcarryx_u64(x358, x343, x345, &x360);
-{ uint64_t x363; uint8_t x364 = _addcarryx_u64(x361, x346, x348, &x363);
-{ uint64_t x366; uint8_t x367 = _addcarryx_u64(x364, x349, x351, &x366);
-{ uint64_t x369; uint8_t x370 = _addcarryx_u64(x367, x352, x354, &x369);
-{ uint64_t x372; uint8_t _ = _addcarryx_u64(0x0, x370, x355, &x372);
-{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x320, x339, &x375);
-{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x323, x357, &x378);
-{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x326, x360, &x381);
-{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x329, x363, &x384);
-{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x332, x366, &x387);
-{ uint64_t x390; uint8_t x391 = _addcarryx_u64(x388, x335, x369, &x390);
-{ uint64_t x393; uint8_t x394 = _addcarryx_u64(x391, x337, x372, &x393);
-{ uint64_t _; uint64_t x396 = _mulx_u64(x375, 0x8e38e38e38e38e39L, &_);
-{ uint64_t x400; uint64_t x399 = _mulx_u64(x396, 0xfffffffffffffff7L, &x400);
-{ uint64_t x403; uint64_t x402 = _mulx_u64(x396, 0xffffffffffffffffL, &x403);
-{ uint64_t x406; uint64_t x405 = _mulx_u64(x396, 0xffffffffffffffffL, &x406);
-{ uint64_t x409; uint64_t x408 = _mulx_u64(x396, 0xffffffffffffffffL, &x409);
-{ uint64_t x412; uint64_t x411 = _mulx_u64(x396, 0xffffffffffffffffL, &x412);
-{ uint64_t x414; uint8_t x415 = _addcarryx_u64(0x0, x400, x402, &x414);
-{ uint64_t x417; uint8_t x418 = _addcarryx_u64(x415, x403, x405, &x417);
-{ uint64_t x420; uint8_t x421 = _addcarryx_u64(x418, x406, x408, &x420);
-{ uint64_t x423; uint8_t x424 = _addcarryx_u64(x421, x409, x411, &x423);
-{ uint64_t x426; uint8_t x427 = _addcarryx_u64(x424, x412, x396, &x426);
-{ uint64_t _; uint8_t x430 = _addcarryx_u64(0x0, x375, x399, &_);
-{ uint64_t x432; uint8_t x433 = _addcarryx_u64(x430, x378, x414, &x432);
-{ uint64_t x435; uint8_t x436 = _addcarryx_u64(x433, x381, x417, &x435);
-{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x384, x420, &x438);
-{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x387, x423, &x441);
-{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x390, x426, &x444);
-{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x393, x427, &x447);
-{ uint8_t x449 = (x448 + x394);
-{ uint64_t x452; uint64_t x451 = _mulx_u64(x13, x15, &x452);
-{ uint64_t x455; uint64_t x454 = _mulx_u64(x13, x17, &x455);
-{ uint64_t x458; uint64_t x457 = _mulx_u64(x13, x19, &x458);
-{ uint64_t x461; uint64_t x460 = _mulx_u64(x13, x21, &x461);
-{ uint64_t x464; uint64_t x463 = _mulx_u64(x13, x23, &x464);
-{ uint64_t x467; uint64_t x466 = _mulx_u64(x13, x22, &x467);
-{ uint64_t x469; uint8_t x470 = _addcarryx_u64(0x0, x452, x454, &x469);
-{ uint64_t x472; uint8_t x473 = _addcarryx_u64(x470, x455, x457, &x472);
-{ uint64_t x475; uint8_t x476 = _addcarryx_u64(x473, x458, x460, &x475);
-{ uint64_t x478; uint8_t x479 = _addcarryx_u64(x476, x461, x463, &x478);
-{ uint64_t x481; uint8_t x482 = _addcarryx_u64(x479, x464, x466, &x481);
-{ uint64_t x484; uint8_t _ = _addcarryx_u64(0x0, x482, x467, &x484);
-{ uint64_t x487; uint8_t x488 = _addcarryx_u64(0x0, x432, x451, &x487);
-{ uint64_t x490; uint8_t x491 = _addcarryx_u64(x488, x435, x469, &x490);
-{ uint64_t x493; uint8_t x494 = _addcarryx_u64(x491, x438, x472, &x493);
-{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x441, x475, &x496);
-{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x444, x478, &x499);
-{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x447, x481, &x502);
-{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x449, x484, &x505);
-{ uint64_t _; uint64_t x508 = _mulx_u64(x487, 0x8e38e38e38e38e39L, &_);
-{ uint64_t x512; uint64_t x511 = _mulx_u64(x508, 0xfffffffffffffff7L, &x512);
-{ uint64_t x515; uint64_t x514 = _mulx_u64(x508, 0xffffffffffffffffL, &x515);
-{ uint64_t x518; uint64_t x517 = _mulx_u64(x508, 0xffffffffffffffffL, &x518);
-{ uint64_t x521; uint64_t x520 = _mulx_u64(x508, 0xffffffffffffffffL, &x521);
-{ uint64_t x524; uint64_t x523 = _mulx_u64(x508, 0xffffffffffffffffL, &x524);
-{ uint64_t x526; uint8_t x527 = _addcarryx_u64(0x0, x512, x514, &x526);
-{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x515, x517, &x529);
-{ uint64_t x532; uint8_t x533 = _addcarryx_u64(x530, x518, x520, &x532);
-{ uint64_t x535; uint8_t x536 = _addcarryx_u64(x533, x521, x523, &x535);
-{ uint64_t x538; uint8_t x539 = _addcarryx_u64(x536, x524, x508, &x538);
-{ uint64_t _; uint8_t x542 = _addcarryx_u64(0x0, x487, x511, &_);
-{ uint64_t x544; uint8_t x545 = _addcarryx_u64(x542, x490, x526, &x544);
-{ uint64_t x547; uint8_t x548 = _addcarryx_u64(x545, x493, x529, &x547);
-{ uint64_t x550; uint8_t x551 = _addcarryx_u64(x548, x496, x532, &x550);
-{ uint64_t x553; uint8_t x554 = _addcarryx_u64(x551, x499, x535, &x553);
-{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x502, x538, &x556);
-{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x505, x539, &x559);
-{ uint8_t x561 = (x560 + x506);
-{ uint64_t x564; uint64_t x563 = _mulx_u64(x12, x15, &x564);
-{ uint64_t x567; uint64_t x566 = _mulx_u64(x12, x17, &x567);
-{ uint64_t x570; uint64_t x569 = _mulx_u64(x12, x19, &x570);
-{ uint64_t x573; uint64_t x572 = _mulx_u64(x12, x21, &x573);
-{ uint64_t x576; uint64_t x575 = _mulx_u64(x12, x23, &x576);
-{ uint64_t x579; uint64_t x578 = _mulx_u64(x12, x22, &x579);
-{ uint64_t x581; uint8_t x582 = _addcarryx_u64(0x0, x564, x566, &x581);
-{ uint64_t x584; uint8_t x585 = _addcarryx_u64(x582, x567, x569, &x584);
-{ uint64_t x587; uint8_t x588 = _addcarryx_u64(x585, x570, x572, &x587);
-{ uint64_t x590; uint8_t x591 = _addcarryx_u64(x588, x573, x575, &x590);
-{ uint64_t x593; uint8_t x594 = _addcarryx_u64(x591, x576, x578, &x593);
-{ uint64_t x596; uint8_t _ = _addcarryx_u64(0x0, x594, x579, &x596);
-{ uint64_t x599; uint8_t x600 = _addcarryx_u64(0x0, x544, x563, &x599);
-{ uint64_t x602; uint8_t x603 = _addcarryx_u64(x600, x547, x581, &x602);
-{ uint64_t x605; uint8_t x606 = _addcarryx_u64(x603, x550, x584, &x605);
-{ uint64_t x608; uint8_t x609 = _addcarryx_u64(x606, x553, x587, &x608);
-{ uint64_t x611; uint8_t x612 = _addcarryx_u64(x609, x556, x590, &x611);
-{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x559, x593, &x614);
-{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x561, x596, &x617);
-{ uint64_t _; uint64_t x620 = _mulx_u64(x599, 0x8e38e38e38e38e39L, &_);
-{ uint64_t x624; uint64_t x623 = _mulx_u64(x620, 0xfffffffffffffff7L, &x624);
-{ uint64_t x627; uint64_t x626 = _mulx_u64(x620, 0xffffffffffffffffL, &x627);
-{ uint64_t x630; uint64_t x629 = _mulx_u64(x620, 0xffffffffffffffffL, &x630);
-{ uint64_t x633; uint64_t x632 = _mulx_u64(x620, 0xffffffffffffffffL, &x633);
-{ uint64_t x636; uint64_t x635 = _mulx_u64(x620, 0xffffffffffffffffL, &x636);
-{ uint64_t x638; uint8_t x639 = _addcarryx_u64(0x0, x624, x626, &x638);
-{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x627, x629, &x641);
-{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x630, x632, &x644);
-{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x633, x635, &x647);
-{ uint64_t x650; uint8_t x651 = _addcarryx_u64(x648, x636, x620, &x650);
-{ uint64_t _; uint8_t x654 = _addcarryx_u64(0x0, x599, x623, &_);
-{ uint64_t x656; uint8_t x657 = _addcarryx_u64(x654, x602, x638, &x656);
-{ uint64_t x659; uint8_t x660 = _addcarryx_u64(x657, x605, x641, &x659);
-{ uint64_t x662; uint8_t x663 = _addcarryx_u64(x660, x608, x644, &x662);
-{ uint64_t x665; uint8_t x666 = _addcarryx_u64(x663, x611, x647, &x665);
-{ uint64_t x668; uint8_t x669 = _addcarryx_u64(x666, x614, x650, &x668);
-{ uint64_t x671; uint8_t x672 = _addcarryx_u64(x669, x617, x651, &x671);
-{ uint8_t x673 = (x672 + x618);
-{ uint64_t x675; uint8_t x676 = _subborrow_u64(0x0, x656, 0xfffffffffffffff7L, &x675);
-{ uint64_t x678; uint8_t x679 = _subborrow_u64(x676, x659, 0xffffffffffffffffL, &x678);
-{ uint64_t x681; uint8_t x682 = _subborrow_u64(x679, x662, 0xffffffffffffffffL, &x681);
-{ uint64_t x684; uint8_t x685 = _subborrow_u64(x682, x665, 0xffffffffffffffffL, &x684);
-{ uint64_t x687; uint8_t x688 = _subborrow_u64(x685, x668, 0xffffffffffffffffL, &x687);
-{ uint64_t x690; uint8_t x691 = _subborrow_u64(x688, x671, 0x1, &x690);
-{ uint64_t _; uint8_t x694 = _subborrow_u64(x691, x673, 0x0, &_);
-{ uint64_t x695 = cmovznz(x694, x690, x671);
-{ uint64_t x696 = cmovznz(x694, x687, x668);
-{ uint64_t x697 = cmovznz(x694, x684, x665);
-{ uint64_t x698 = cmovznz(x694, x681, x662);
-{ uint64_t x699 = cmovznz(x694, x678, x659);
-{ uint64_t x700 = cmovznz(x694, x675, x656);
-out[0] = x695;
-out[1] = x696;
-out[2] = x697;
-out[3] = x698;
-out[4] = x699;
-out[5] = x700;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+ { uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+ { uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+ { uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+ { uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+ { uint64_t _; uint64_t x61 = _mulx_u64(x25, 0x8e38e38e38e38e39L, &_);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xfffffffffffffff7L, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
+ { uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+ { uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+ { uint64_t x79; uint8_t x80 = _addcarryx_u64(0x0, x65, x67, &x79);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(x80, x68, x70, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x71, x73, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x74, x76, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x77, x61, &x91);
+ { uint64_t _; uint8_t x95 = _addcarryx_u64(0x0, x25, x64, &_);
+ { uint64_t x97; uint8_t x98 = _addcarryx_u64(x95, x43, x79, &x97);
+ { uint64_t x100; uint8_t x101 = _addcarryx_u64(x98, x46, x82, &x100);
+ { uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x49, x85, &x103);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x52, x88, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x55, x91, &x109);
+ { uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x58, x92, &x112);
+ { uint64_t x116; uint64_t x115 = _mulx_u64(x7, x15, &x116);
+ { uint64_t x119; uint64_t x118 = _mulx_u64(x7, x17, &x119);
+ { uint64_t x122; uint64_t x121 = _mulx_u64(x7, x19, &x122);
+ { uint64_t x125; uint64_t x124 = _mulx_u64(x7, x21, &x125);
+ { uint64_t x128; uint64_t x127 = _mulx_u64(x7, x23, &x128);
+ { uint64_t x131; uint64_t x130 = _mulx_u64(x7, x22, &x131);
+ { uint64_t x133; uint8_t x134 = _addcarryx_u64(0x0, x116, x118, &x133);
+ { uint64_t x136; uint8_t x137 = _addcarryx_u64(x134, x119, x121, &x136);
+ { uint64_t x139; uint8_t x140 = _addcarryx_u64(x137, x122, x124, &x139);
+ { uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+ { uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+ { uint64_t x148; uint8_t _ = _addcarryx_u64(0x0, x146, x131, &x148);
+ { uint64_t x151; uint8_t x152 = _addcarryx_u64(0x0, x97, x115, &x151);
+ { uint64_t x154; uint8_t x155 = _addcarryx_u64(x152, x100, x133, &x154);
+ { uint64_t x157; uint8_t x158 = _addcarryx_u64(x155, x103, x136, &x157);
+ { uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+ { uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+ { uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+ { uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x113, x148, &x169);
+ { uint64_t _; uint64_t x172 = _mulx_u64(x151, 0x8e38e38e38e38e39L, &_);
+ { uint64_t x176; uint64_t x175 = _mulx_u64(x172, 0xfffffffffffffff7L, &x176);
+ { uint64_t x179; uint64_t x178 = _mulx_u64(x172, 0xffffffffffffffffL, &x179);
+ { uint64_t x182; uint64_t x181 = _mulx_u64(x172, 0xffffffffffffffffL, &x182);
+ { uint64_t x185; uint64_t x184 = _mulx_u64(x172, 0xffffffffffffffffL, &x185);
+ { uint64_t x188; uint64_t x187 = _mulx_u64(x172, 0xffffffffffffffffL, &x188);
+ { uint64_t x190; uint8_t x191 = _addcarryx_u64(0x0, x176, x178, &x190);
+ { uint64_t x193; uint8_t x194 = _addcarryx_u64(x191, x179, x181, &x193);
+ { uint64_t x196; uint8_t x197 = _addcarryx_u64(x194, x182, x184, &x196);
+ { uint64_t x199; uint8_t x200 = _addcarryx_u64(x197, x185, x187, &x199);
+ { uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x188, x172, &x202);
+ { uint64_t _; uint8_t x206 = _addcarryx_u64(0x0, x151, x175, &_);
+ { uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x154, x190, &x208);
+ { uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x157, x193, &x211);
+ { uint64_t x214; uint8_t x215 = _addcarryx_u64(x212, x160, x196, &x214);
+ { uint64_t x217; uint8_t x218 = _addcarryx_u64(x215, x163, x199, &x217);
+ { uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x166, x202, &x220);
+ { uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x169, x203, &x223);
+ { uint8_t x225 = (x224 + x170);
+ { uint64_t x228; uint64_t x227 = _mulx_u64(x9, x15, &x228);
+ { uint64_t x231; uint64_t x230 = _mulx_u64(x9, x17, &x231);
+ { uint64_t x234; uint64_t x233 = _mulx_u64(x9, x19, &x234);
+ { uint64_t x237; uint64_t x236 = _mulx_u64(x9, x21, &x237);
+ { uint64_t x240; uint64_t x239 = _mulx_u64(x9, x23, &x240);
+ { uint64_t x243; uint64_t x242 = _mulx_u64(x9, x22, &x243);
+ { uint64_t x245; uint8_t x246 = _addcarryx_u64(0x0, x228, x230, &x245);
+ { uint64_t x248; uint8_t x249 = _addcarryx_u64(x246, x231, x233, &x248);
+ { uint64_t x251; uint8_t x252 = _addcarryx_u64(x249, x234, x236, &x251);
+ { uint64_t x254; uint8_t x255 = _addcarryx_u64(x252, x237, x239, &x254);
+ { uint64_t x257; uint8_t x258 = _addcarryx_u64(x255, x240, x242, &x257);
+ { uint64_t x260; uint8_t _ = _addcarryx_u64(0x0, x258, x243, &x260);
+ { uint64_t x263; uint8_t x264 = _addcarryx_u64(0x0, x208, x227, &x263);
+ { uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x211, x245, &x266);
+ { uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x214, x248, &x269);
+ { uint64_t x272; uint8_t x273 = _addcarryx_u64(x270, x217, x251, &x272);
+ { uint64_t x275; uint8_t x276 = _addcarryx_u64(x273, x220, x254, &x275);
+ { uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+ { uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x225, x260, &x281);
+ { uint64_t _; uint64_t x284 = _mulx_u64(x263, 0x8e38e38e38e38e39L, &_);
+ { uint64_t x288; uint64_t x287 = _mulx_u64(x284, 0xfffffffffffffff7L, &x288);
+ { uint64_t x291; uint64_t x290 = _mulx_u64(x284, 0xffffffffffffffffL, &x291);
+ { uint64_t x294; uint64_t x293 = _mulx_u64(x284, 0xffffffffffffffffL, &x294);
+ { uint64_t x297; uint64_t x296 = _mulx_u64(x284, 0xffffffffffffffffL, &x297);
+ { uint64_t x300; uint64_t x299 = _mulx_u64(x284, 0xffffffffffffffffL, &x300);
+ { uint64_t x302; uint8_t x303 = _addcarryx_u64(0x0, x288, x290, &x302);
+ { uint64_t x305; uint8_t x306 = _addcarryx_u64(x303, x291, x293, &x305);
+ { uint64_t x308; uint8_t x309 = _addcarryx_u64(x306, x294, x296, &x308);
+ { uint64_t x311; uint8_t x312 = _addcarryx_u64(x309, x297, x299, &x311);
+ { uint64_t x314; uint8_t x315 = _addcarryx_u64(x312, x300, x284, &x314);
+ { uint64_t _; uint8_t x318 = _addcarryx_u64(0x0, x263, x287, &_);
+ { uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x266, x302, &x320);
+ { uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x269, x305, &x323);
+ { uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x272, x308, &x326);
+ { uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x275, x311, &x329);
+ { uint64_t x332; uint8_t x333 = _addcarryx_u64(x330, x278, x314, &x332);
+ { uint64_t x335; uint8_t x336 = _addcarryx_u64(x333, x281, x315, &x335);
+ { uint8_t x337 = (x336 + x282);
+ { uint64_t x340; uint64_t x339 = _mulx_u64(x11, x15, &x340);
+ { uint64_t x343; uint64_t x342 = _mulx_u64(x11, x17, &x343);
+ { uint64_t x346; uint64_t x345 = _mulx_u64(x11, x19, &x346);
+ { uint64_t x349; uint64_t x348 = _mulx_u64(x11, x21, &x349);
+ { uint64_t x352; uint64_t x351 = _mulx_u64(x11, x23, &x352);
+ { uint64_t x355; uint64_t x354 = _mulx_u64(x11, x22, &x355);
+ { uint64_t x357; uint8_t x358 = _addcarryx_u64(0x0, x340, x342, &x357);
+ { uint64_t x360; uint8_t x361 = _addcarryx_u64(x358, x343, x345, &x360);
+ { uint64_t x363; uint8_t x364 = _addcarryx_u64(x361, x346, x348, &x363);
+ { uint64_t x366; uint8_t x367 = _addcarryx_u64(x364, x349, x351, &x366);
+ { uint64_t x369; uint8_t x370 = _addcarryx_u64(x367, x352, x354, &x369);
+ { uint64_t x372; uint8_t _ = _addcarryx_u64(0x0, x370, x355, &x372);
+ { uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x320, x339, &x375);
+ { uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x323, x357, &x378);
+ { uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x326, x360, &x381);
+ { uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x329, x363, &x384);
+ { uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x332, x366, &x387);
+ { uint64_t x390; uint8_t x391 = _addcarryx_u64(x388, x335, x369, &x390);
+ { uint64_t x393; uint8_t x394 = _addcarryx_u64(x391, x337, x372, &x393);
+ { uint64_t _; uint64_t x396 = _mulx_u64(x375, 0x8e38e38e38e38e39L, &_);
+ { uint64_t x400; uint64_t x399 = _mulx_u64(x396, 0xfffffffffffffff7L, &x400);
+ { uint64_t x403; uint64_t x402 = _mulx_u64(x396, 0xffffffffffffffffL, &x403);
+ { uint64_t x406; uint64_t x405 = _mulx_u64(x396, 0xffffffffffffffffL, &x406);
+ { uint64_t x409; uint64_t x408 = _mulx_u64(x396, 0xffffffffffffffffL, &x409);
+ { uint64_t x412; uint64_t x411 = _mulx_u64(x396, 0xffffffffffffffffL, &x412);
+ { uint64_t x414; uint8_t x415 = _addcarryx_u64(0x0, x400, x402, &x414);
+ { uint64_t x417; uint8_t x418 = _addcarryx_u64(x415, x403, x405, &x417);
+ { uint64_t x420; uint8_t x421 = _addcarryx_u64(x418, x406, x408, &x420);
+ { uint64_t x423; uint8_t x424 = _addcarryx_u64(x421, x409, x411, &x423);
+ { uint64_t x426; uint8_t x427 = _addcarryx_u64(x424, x412, x396, &x426);
+ { uint64_t _; uint8_t x430 = _addcarryx_u64(0x0, x375, x399, &_);
+ { uint64_t x432; uint8_t x433 = _addcarryx_u64(x430, x378, x414, &x432);
+ { uint64_t x435; uint8_t x436 = _addcarryx_u64(x433, x381, x417, &x435);
+ { uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x384, x420, &x438);
+ { uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x387, x423, &x441);
+ { uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x390, x426, &x444);
+ { uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x393, x427, &x447);
+ { uint8_t x449 = (x448 + x394);
+ { uint64_t x452; uint64_t x451 = _mulx_u64(x13, x15, &x452);
+ { uint64_t x455; uint64_t x454 = _mulx_u64(x13, x17, &x455);
+ { uint64_t x458; uint64_t x457 = _mulx_u64(x13, x19, &x458);
+ { uint64_t x461; uint64_t x460 = _mulx_u64(x13, x21, &x461);
+ { uint64_t x464; uint64_t x463 = _mulx_u64(x13, x23, &x464);
+ { uint64_t x467; uint64_t x466 = _mulx_u64(x13, x22, &x467);
+ { uint64_t x469; uint8_t x470 = _addcarryx_u64(0x0, x452, x454, &x469);
+ { uint64_t x472; uint8_t x473 = _addcarryx_u64(x470, x455, x457, &x472);
+ { uint64_t x475; uint8_t x476 = _addcarryx_u64(x473, x458, x460, &x475);
+ { uint64_t x478; uint8_t x479 = _addcarryx_u64(x476, x461, x463, &x478);
+ { uint64_t x481; uint8_t x482 = _addcarryx_u64(x479, x464, x466, &x481);
+ { uint64_t x484; uint8_t _ = _addcarryx_u64(0x0, x482, x467, &x484);
+ { uint64_t x487; uint8_t x488 = _addcarryx_u64(0x0, x432, x451, &x487);
+ { uint64_t x490; uint8_t x491 = _addcarryx_u64(x488, x435, x469, &x490);
+ { uint64_t x493; uint8_t x494 = _addcarryx_u64(x491, x438, x472, &x493);
+ { uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x441, x475, &x496);
+ { uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x444, x478, &x499);
+ { uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x447, x481, &x502);
+ { uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x449, x484, &x505);
+ { uint64_t _; uint64_t x508 = _mulx_u64(x487, 0x8e38e38e38e38e39L, &_);
+ { uint64_t x512; uint64_t x511 = _mulx_u64(x508, 0xfffffffffffffff7L, &x512);
+ { uint64_t x515; uint64_t x514 = _mulx_u64(x508, 0xffffffffffffffffL, &x515);
+ { uint64_t x518; uint64_t x517 = _mulx_u64(x508, 0xffffffffffffffffL, &x518);
+ { uint64_t x521; uint64_t x520 = _mulx_u64(x508, 0xffffffffffffffffL, &x521);
+ { uint64_t x524; uint64_t x523 = _mulx_u64(x508, 0xffffffffffffffffL, &x524);
+ { uint64_t x526; uint8_t x527 = _addcarryx_u64(0x0, x512, x514, &x526);
+ { uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x515, x517, &x529);
+ { uint64_t x532; uint8_t x533 = _addcarryx_u64(x530, x518, x520, &x532);
+ { uint64_t x535; uint8_t x536 = _addcarryx_u64(x533, x521, x523, &x535);
+ { uint64_t x538; uint8_t x539 = _addcarryx_u64(x536, x524, x508, &x538);
+ { uint64_t _; uint8_t x542 = _addcarryx_u64(0x0, x487, x511, &_);
+ { uint64_t x544; uint8_t x545 = _addcarryx_u64(x542, x490, x526, &x544);
+ { uint64_t x547; uint8_t x548 = _addcarryx_u64(x545, x493, x529, &x547);
+ { uint64_t x550; uint8_t x551 = _addcarryx_u64(x548, x496, x532, &x550);
+ { uint64_t x553; uint8_t x554 = _addcarryx_u64(x551, x499, x535, &x553);
+ { uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x502, x538, &x556);
+ { uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x505, x539, &x559);
+ { uint8_t x561 = (x560 + x506);
+ { uint64_t x564; uint64_t x563 = _mulx_u64(x12, x15, &x564);
+ { uint64_t x567; uint64_t x566 = _mulx_u64(x12, x17, &x567);
+ { uint64_t x570; uint64_t x569 = _mulx_u64(x12, x19, &x570);
+ { uint64_t x573; uint64_t x572 = _mulx_u64(x12, x21, &x573);
+ { uint64_t x576; uint64_t x575 = _mulx_u64(x12, x23, &x576);
+ { uint64_t x579; uint64_t x578 = _mulx_u64(x12, x22, &x579);
+ { uint64_t x581; uint8_t x582 = _addcarryx_u64(0x0, x564, x566, &x581);
+ { uint64_t x584; uint8_t x585 = _addcarryx_u64(x582, x567, x569, &x584);
+ { uint64_t x587; uint8_t x588 = _addcarryx_u64(x585, x570, x572, &x587);
+ { uint64_t x590; uint8_t x591 = _addcarryx_u64(x588, x573, x575, &x590);
+ { uint64_t x593; uint8_t x594 = _addcarryx_u64(x591, x576, x578, &x593);
+ { uint64_t x596; uint8_t _ = _addcarryx_u64(0x0, x594, x579, &x596);
+ { uint64_t x599; uint8_t x600 = _addcarryx_u64(0x0, x544, x563, &x599);
+ { uint64_t x602; uint8_t x603 = _addcarryx_u64(x600, x547, x581, &x602);
+ { uint64_t x605; uint8_t x606 = _addcarryx_u64(x603, x550, x584, &x605);
+ { uint64_t x608; uint8_t x609 = _addcarryx_u64(x606, x553, x587, &x608);
+ { uint64_t x611; uint8_t x612 = _addcarryx_u64(x609, x556, x590, &x611);
+ { uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x559, x593, &x614);
+ { uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x561, x596, &x617);
+ { uint64_t _; uint64_t x620 = _mulx_u64(x599, 0x8e38e38e38e38e39L, &_);
+ { uint64_t x624; uint64_t x623 = _mulx_u64(x620, 0xfffffffffffffff7L, &x624);
+ { uint64_t x627; uint64_t x626 = _mulx_u64(x620, 0xffffffffffffffffL, &x627);
+ { uint64_t x630; uint64_t x629 = _mulx_u64(x620, 0xffffffffffffffffL, &x630);
+ { uint64_t x633; uint64_t x632 = _mulx_u64(x620, 0xffffffffffffffffL, &x633);
+ { uint64_t x636; uint64_t x635 = _mulx_u64(x620, 0xffffffffffffffffL, &x636);
+ { uint64_t x638; uint8_t x639 = _addcarryx_u64(0x0, x624, x626, &x638);
+ { uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x627, x629, &x641);
+ { uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x630, x632, &x644);
+ { uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x633, x635, &x647);
+ { uint64_t x650; uint8_t x651 = _addcarryx_u64(x648, x636, x620, &x650);
+ { uint64_t _; uint8_t x654 = _addcarryx_u64(0x0, x599, x623, &_);
+ { uint64_t x656; uint8_t x657 = _addcarryx_u64(x654, x602, x638, &x656);
+ { uint64_t x659; uint8_t x660 = _addcarryx_u64(x657, x605, x641, &x659);
+ { uint64_t x662; uint8_t x663 = _addcarryx_u64(x660, x608, x644, &x662);
+ { uint64_t x665; uint8_t x666 = _addcarryx_u64(x663, x611, x647, &x665);
+ { uint64_t x668; uint8_t x669 = _addcarryx_u64(x666, x614, x650, &x668);
+ { uint64_t x671; uint8_t x672 = _addcarryx_u64(x669, x617, x651, &x671);
+ { uint8_t x673 = (x672 + x618);
+ { uint64_t x675; uint8_t x676 = _subborrow_u64(0x0, x656, 0xfffffffffffffff7L, &x675);
+ { uint64_t x678; uint8_t x679 = _subborrow_u64(x676, x659, 0xffffffffffffffffL, &x678);
+ { uint64_t x681; uint8_t x682 = _subborrow_u64(x679, x662, 0xffffffffffffffffL, &x681);
+ { uint64_t x684; uint8_t x685 = _subborrow_u64(x682, x665, 0xffffffffffffffffL, &x684);
+ { uint64_t x687; uint8_t x688 = _subborrow_u64(x685, x668, 0xffffffffffffffffL, &x687);
+ { uint64_t x690; uint8_t x691 = _subborrow_u64(x688, x671, 0x1, &x690);
+ { uint64_t _; uint8_t x694 = _subborrow_u64(x691, x673, 0x0, &_);
+ { uint64_t x695 = cmovznz(x694, x690, x671);
+ { uint64_t x696 = cmovznz(x694, x687, x668);
+ { uint64_t x697 = cmovznz(x694, x684, x665);
+ { uint64_t x698 = cmovznz(x694, x681, x662);
+ { uint64_t x699 = cmovznz(x694, x678, x659);
+ { uint64_t x700 = cmovznz(x694, x675, x656);
+ out[0] = x700;
+ out[1] = x699;
+ out[2] = x698;
+ out[3] = x697;
+ out[4] = x696;
+ out[5] = x695;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e321m9/fenz.c b/src/Specific/montgomery64_2e321m9/fenz.c
index ad3763a14..c93f74257 100644
--- a/src/Specific/montgomery64_2e321m9/fenz.c
+++ b/src/Specific/montgomery64_2e321m9/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x11 = (x10 | x9);
-{ uint64_t x12 = (x8 | x11);
-{ uint64_t x13 = (x6 | x12);
-{ uint64_t x14 = (x4 | x13);
-{ uint64_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x11 = (x10 | x9);
+ { uint64_t x12 = (x8 | x11);
+ { uint64_t x13 = (x6 | x12);
+ { uint64_t x14 = (x4 | x13);
+ { uint64_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e321m9/feopp.c b/src/Specific/montgomery64_2e321m9/feopp.c
index 41fcb70ec..cc56acb08 100644
--- a/src/Specific/montgomery64_2e321m9/feopp.c
+++ b/src/Specific/montgomery64_2e321m9/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
-{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
-{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
-{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
-{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
-{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
-{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
-{ uint64_t x30 = (x29 & 0xfffffffffffffff7L);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
-{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
-{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
-{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
-{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
-{ uint8_t x50 = ((uint8_t)x29 & 0x1);
-{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+ { uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+ { uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+ { uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+ { uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+ { uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+ { uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+ { uint64_t x30 = (x29 & 0xfffffffffffffff7L);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+ { uint64_t x34 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+ { uint64_t x38 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+ { uint64_t x42 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+ { uint64_t x46 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+ { uint8_t x50 = ((uint8_t)x29 & 0x1);
+ { uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e321m9/fesub.c b/src/Specific/montgomery64_2e321m9/fesub.c
index 1e59f4d31..06b816e57 100644
--- a/src/Specific/montgomery64_2e321m9/fesub.c
+++ b/src/Specific/montgomery64_2e321m9/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
-{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
-{ uint64_t x43 = (x42 & 0xfffffffffffffff7L);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
-{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
-{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
-{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
-{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
-{ uint8_t x63 = ((uint8_t)x42 & 0x1);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+ { uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+ { uint64_t x43 = (x42 & 0xfffffffffffffff7L);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+ { uint64_t x47 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+ { uint64_t x51 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+ { uint64_t x55 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+ { uint64_t x59 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+ { uint8_t x63 = ((uint8_t)x42 & 0x1);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e322m2e161m1/feadd.c b/src/Specific/montgomery64_2e322m2e161m1/feadd.c
index 659abdb6a..4ea74fcaf 100644
--- a/src/Specific/montgomery64_2e322m2e161m1/feadd.c
+++ b/src/Specific/montgomery64_2e322m2e161m1/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
-{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffffffL, &x43);
-{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
-{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xfffffffdffffffffL, &x49);
-{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
-{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
-{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x3, &x58);
-{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
-{ uint64_t x63 = cmovznz(x62, x58, x40);
-{ uint64_t x64 = cmovznz(x62, x55, x37);
-{ uint64_t x65 = cmovznz(x62, x52, x34);
-{ uint64_t x66 = cmovznz(x62, x49, x31);
-{ uint64_t x67 = cmovznz(x62, x46, x28);
-{ uint64_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+ { uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffffffL, &x43);
+ { uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+ { uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xfffffffdffffffffL, &x49);
+ { uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+ { uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+ { uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x3, &x58);
+ { uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+ { uint64_t x63 = cmovznz(x62, x58, x40);
+ { uint64_t x64 = cmovznz(x62, x55, x37);
+ { uint64_t x65 = cmovznz(x62, x52, x34);
+ { uint64_t x66 = cmovznz(x62, x49, x31);
+ { uint64_t x67 = cmovznz(x62, x46, x28);
+ { uint64_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e322m2e161m1/femul.c b/src/Specific/montgomery64_2e322m2e161m1/femul.c
index e156fe17f..2bca93d7b 100644
--- a/src/Specific/montgomery64_2e322m2e161m1/femul.c
+++ b/src/Specific/montgomery64_2e322m2e161m1/femul.c
@@ -1,41 +1,260 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
-{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
-{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
-{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
-{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
-{ uint64_t x62; uint64_t x61 = _mulx_u64(x25, 0xffffffffffffffffL, &x62);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x25, 0xffffffffffffffffL, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x25, 0xfffffffdffffffffL, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x25, 0xffffffffffffffffL, &x71);
-{ uint64_t x74; uint64_t x73 = _mulx_u64(x25, 0xffffffffffffffffL, &x74);
-out[0] = uint64_t x76;
-out[1] = uint8_t x77 = Op Syntax.MulSplit 64 Syntax.TWord 6 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 3 x25;
-out[2] = 0x3;;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+ { uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+ { uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+ { uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+ { uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+ { uint64_t x62; uint64_t x61 = _mulx_u64(x25, 0xffffffffffffffffL, &x62);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x25, 0xffffffffffffffffL, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x25, 0xfffffffdffffffffL, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x25, 0xffffffffffffffffL, &x71);
+ { uint64_t x74; uint64_t x73 = _mulx_u64(x25, 0xffffffffffffffffL, &x74);
+ { uint64_t x76, uint8_t x77 = Op (Syntax.MulSplit 64 (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x25, 0x3);
+ { uint64_t x79; uint8_t x80 = _addcarryx_u64(0x0, x62, x64, &x79);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(x80, x65, x67, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+ { uint8_t x93 = (x92 + x77);
+ { uint64_t _; uint8_t x96 = _addcarryx_u64(0x0, x25, x61, &_);
+ { uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x43, x79, &x98);
+ { uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x46, x82, &x101);
+ { uint64_t x104; uint8_t x105 = _addcarryx_u64(x102, x49, x85, &x104);
+ { uint64_t x107; uint8_t x108 = _addcarryx_u64(x105, x52, x88, &x107);
+ { uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x55, x91, &x110);
+ { uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x58, x93, &x113);
+ { uint64_t x117; uint64_t x116 = _mulx_u64(x7, x15, &x117);
+ { uint64_t x120; uint64_t x119 = _mulx_u64(x7, x17, &x120);
+ { uint64_t x123; uint64_t x122 = _mulx_u64(x7, x19, &x123);
+ { uint64_t x126; uint64_t x125 = _mulx_u64(x7, x21, &x126);
+ { uint64_t x129; uint64_t x128 = _mulx_u64(x7, x23, &x129);
+ { uint64_t x132; uint64_t x131 = _mulx_u64(x7, x22, &x132);
+ { uint64_t x134; uint8_t x135 = _addcarryx_u64(0x0, x117, x119, &x134);
+ { uint64_t x137; uint8_t x138 = _addcarryx_u64(x135, x120, x122, &x137);
+ { uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x123, x125, &x140);
+ { uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x126, x128, &x143);
+ { uint64_t x146; uint8_t x147 = _addcarryx_u64(x144, x129, x131, &x146);
+ { uint64_t x149; uint8_t _ = _addcarryx_u64(0x0, x147, x132, &x149);
+ { uint64_t x152; uint8_t x153 = _addcarryx_u64(0x0, x98, x116, &x152);
+ { uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x101, x134, &x155);
+ { uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x104, x137, &x158);
+ { uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x107, x140, &x161);
+ { uint64_t x164; uint8_t x165 = _addcarryx_u64(x162, x110, x143, &x164);
+ { uint64_t x167; uint8_t x168 = _addcarryx_u64(x165, x113, x146, &x167);
+ { uint64_t x170; uint8_t x171 = _addcarryx_u64(x168, x114, x149, &x170);
+ { uint64_t x174; uint64_t x173 = _mulx_u64(x152, 0xffffffffffffffffL, &x174);
+ { uint64_t x177; uint64_t x176 = _mulx_u64(x152, 0xffffffffffffffffL, &x177);
+ { uint64_t x180; uint64_t x179 = _mulx_u64(x152, 0xfffffffdffffffffL, &x180);
+ { uint64_t x183; uint64_t x182 = _mulx_u64(x152, 0xffffffffffffffffL, &x183);
+ { uint64_t x186; uint64_t x185 = _mulx_u64(x152, 0xffffffffffffffffL, &x186);
+ { uint64_t x188, uint8_t x189 = Op (Syntax.MulSplit 64 (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x152, 0x3);
+ { uint64_t x191; uint8_t x192 = _addcarryx_u64(0x0, x174, x176, &x191);
+ { uint64_t x194; uint8_t x195 = _addcarryx_u64(x192, x177, x179, &x194);
+ { uint64_t x197; uint8_t x198 = _addcarryx_u64(x195, x180, x182, &x197);
+ { uint64_t x200; uint8_t x201 = _addcarryx_u64(x198, x183, x185, &x200);
+ { uint64_t x203; uint8_t x204 = _addcarryx_u64(x201, x186, x188, &x203);
+ { uint8_t x205 = (x204 + x189);
+ { uint64_t _; uint8_t x208 = _addcarryx_u64(0x0, x152, x173, &_);
+ { uint64_t x210; uint8_t x211 = _addcarryx_u64(x208, x155, x191, &x210);
+ { uint64_t x213; uint8_t x214 = _addcarryx_u64(x211, x158, x194, &x213);
+ { uint64_t x216; uint8_t x217 = _addcarryx_u64(x214, x161, x197, &x216);
+ { uint64_t x219; uint8_t x220 = _addcarryx_u64(x217, x164, x200, &x219);
+ { uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x167, x203, &x222);
+ { uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x170, x205, &x225);
+ { uint8_t x227 = (x226 + x171);
+ { uint64_t x230; uint64_t x229 = _mulx_u64(x9, x15, &x230);
+ { uint64_t x233; uint64_t x232 = _mulx_u64(x9, x17, &x233);
+ { uint64_t x236; uint64_t x235 = _mulx_u64(x9, x19, &x236);
+ { uint64_t x239; uint64_t x238 = _mulx_u64(x9, x21, &x239);
+ { uint64_t x242; uint64_t x241 = _mulx_u64(x9, x23, &x242);
+ { uint64_t x245; uint64_t x244 = _mulx_u64(x9, x22, &x245);
+ { uint64_t x247; uint8_t x248 = _addcarryx_u64(0x0, x230, x232, &x247);
+ { uint64_t x250; uint8_t x251 = _addcarryx_u64(x248, x233, x235, &x250);
+ { uint64_t x253; uint8_t x254 = _addcarryx_u64(x251, x236, x238, &x253);
+ { uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x239, x241, &x256);
+ { uint64_t x259; uint8_t x260 = _addcarryx_u64(x257, x242, x244, &x259);
+ { uint64_t x262; uint8_t _ = _addcarryx_u64(0x0, x260, x245, &x262);
+ { uint64_t x265; uint8_t x266 = _addcarryx_u64(0x0, x210, x229, &x265);
+ { uint64_t x268; uint8_t x269 = _addcarryx_u64(x266, x213, x247, &x268);
+ { uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x216, x250, &x271);
+ { uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x219, x253, &x274);
+ { uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x222, x256, &x277);
+ { uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x225, x259, &x280);
+ { uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x227, x262, &x283);
+ { uint64_t x287; uint64_t x286 = _mulx_u64(x265, 0xffffffffffffffffL, &x287);
+ { uint64_t x290; uint64_t x289 = _mulx_u64(x265, 0xffffffffffffffffL, &x290);
+ { uint64_t x293; uint64_t x292 = _mulx_u64(x265, 0xfffffffdffffffffL, &x293);
+ { uint64_t x296; uint64_t x295 = _mulx_u64(x265, 0xffffffffffffffffL, &x296);
+ { uint64_t x299; uint64_t x298 = _mulx_u64(x265, 0xffffffffffffffffL, &x299);
+ { uint64_t x301, uint8_t x302 = Op (Syntax.MulSplit 64 (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x265, 0x3);
+ { uint64_t x304; uint8_t x305 = _addcarryx_u64(0x0, x287, x289, &x304);
+ { uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x290, x292, &x307);
+ { uint64_t x310; uint8_t x311 = _addcarryx_u64(x308, x293, x295, &x310);
+ { uint64_t x313; uint8_t x314 = _addcarryx_u64(x311, x296, x298, &x313);
+ { uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x299, x301, &x316);
+ { uint8_t x318 = (x317 + x302);
+ { uint64_t _; uint8_t x321 = _addcarryx_u64(0x0, x265, x286, &_);
+ { uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x268, x304, &x323);
+ { uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x271, x307, &x326);
+ { uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x274, x310, &x329);
+ { uint64_t x332; uint8_t x333 = _addcarryx_u64(x330, x277, x313, &x332);
+ { uint64_t x335; uint8_t x336 = _addcarryx_u64(x333, x280, x316, &x335);
+ { uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x283, x318, &x338);
+ { uint8_t x340 = (x339 + x284);
+ { uint64_t x343; uint64_t x342 = _mulx_u64(x11, x15, &x343);
+ { uint64_t x346; uint64_t x345 = _mulx_u64(x11, x17, &x346);
+ { uint64_t x349; uint64_t x348 = _mulx_u64(x11, x19, &x349);
+ { uint64_t x352; uint64_t x351 = _mulx_u64(x11, x21, &x352);
+ { uint64_t x355; uint64_t x354 = _mulx_u64(x11, x23, &x355);
+ { uint64_t x358; uint64_t x357 = _mulx_u64(x11, x22, &x358);
+ { uint64_t x360; uint8_t x361 = _addcarryx_u64(0x0, x343, x345, &x360);
+ { uint64_t x363; uint8_t x364 = _addcarryx_u64(x361, x346, x348, &x363);
+ { uint64_t x366; uint8_t x367 = _addcarryx_u64(x364, x349, x351, &x366);
+ { uint64_t x369; uint8_t x370 = _addcarryx_u64(x367, x352, x354, &x369);
+ { uint64_t x372; uint8_t x373 = _addcarryx_u64(x370, x355, x357, &x372);
+ { uint64_t x375; uint8_t _ = _addcarryx_u64(0x0, x373, x358, &x375);
+ { uint64_t x378; uint8_t x379 = _addcarryx_u64(0x0, x323, x342, &x378);
+ { uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x326, x360, &x381);
+ { uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x329, x363, &x384);
+ { uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x332, x366, &x387);
+ { uint64_t x390; uint8_t x391 = _addcarryx_u64(x388, x335, x369, &x390);
+ { uint64_t x393; uint8_t x394 = _addcarryx_u64(x391, x338, x372, &x393);
+ { uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x340, x375, &x396);
+ { uint64_t x400; uint64_t x399 = _mulx_u64(x378, 0xffffffffffffffffL, &x400);
+ { uint64_t x403; uint64_t x402 = _mulx_u64(x378, 0xffffffffffffffffL, &x403);
+ { uint64_t x406; uint64_t x405 = _mulx_u64(x378, 0xfffffffdffffffffL, &x406);
+ { uint64_t x409; uint64_t x408 = _mulx_u64(x378, 0xffffffffffffffffL, &x409);
+ { uint64_t x412; uint64_t x411 = _mulx_u64(x378, 0xffffffffffffffffL, &x412);
+ { uint64_t x414, uint8_t x415 = Op (Syntax.MulSplit 64 (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x378, 0x3);
+ { uint64_t x417; uint8_t x418 = _addcarryx_u64(0x0, x400, x402, &x417);
+ { uint64_t x420; uint8_t x421 = _addcarryx_u64(x418, x403, x405, &x420);
+ { uint64_t x423; uint8_t x424 = _addcarryx_u64(x421, x406, x408, &x423);
+ { uint64_t x426; uint8_t x427 = _addcarryx_u64(x424, x409, x411, &x426);
+ { uint64_t x429; uint8_t x430 = _addcarryx_u64(x427, x412, x414, &x429);
+ { uint8_t x431 = (x430 + x415);
+ { uint64_t _; uint8_t x434 = _addcarryx_u64(0x0, x378, x399, &_);
+ { uint64_t x436; uint8_t x437 = _addcarryx_u64(x434, x381, x417, &x436);
+ { uint64_t x439; uint8_t x440 = _addcarryx_u64(x437, x384, x420, &x439);
+ { uint64_t x442; uint8_t x443 = _addcarryx_u64(x440, x387, x423, &x442);
+ { uint64_t x445; uint8_t x446 = _addcarryx_u64(x443, x390, x426, &x445);
+ { uint64_t x448; uint8_t x449 = _addcarryx_u64(x446, x393, x429, &x448);
+ { uint64_t x451; uint8_t x452 = _addcarryx_u64(x449, x396, x431, &x451);
+ { uint8_t x453 = (x452 + x397);
+ { uint64_t x456; uint64_t x455 = _mulx_u64(x13, x15, &x456);
+ { uint64_t x459; uint64_t x458 = _mulx_u64(x13, x17, &x459);
+ { uint64_t x462; uint64_t x461 = _mulx_u64(x13, x19, &x462);
+ { uint64_t x465; uint64_t x464 = _mulx_u64(x13, x21, &x465);
+ { uint64_t x468; uint64_t x467 = _mulx_u64(x13, x23, &x468);
+ { uint64_t x471; uint64_t x470 = _mulx_u64(x13, x22, &x471);
+ { uint64_t x473; uint8_t x474 = _addcarryx_u64(0x0, x456, x458, &x473);
+ { uint64_t x476; uint8_t x477 = _addcarryx_u64(x474, x459, x461, &x476);
+ { uint64_t x479; uint8_t x480 = _addcarryx_u64(x477, x462, x464, &x479);
+ { uint64_t x482; uint8_t x483 = _addcarryx_u64(x480, x465, x467, &x482);
+ { uint64_t x485; uint8_t x486 = _addcarryx_u64(x483, x468, x470, &x485);
+ { uint64_t x488; uint8_t _ = _addcarryx_u64(0x0, x486, x471, &x488);
+ { uint64_t x491; uint8_t x492 = _addcarryx_u64(0x0, x436, x455, &x491);
+ { uint64_t x494; uint8_t x495 = _addcarryx_u64(x492, x439, x473, &x494);
+ { uint64_t x497; uint8_t x498 = _addcarryx_u64(x495, x442, x476, &x497);
+ { uint64_t x500; uint8_t x501 = _addcarryx_u64(x498, x445, x479, &x500);
+ { uint64_t x503; uint8_t x504 = _addcarryx_u64(x501, x448, x482, &x503);
+ { uint64_t x506; uint8_t x507 = _addcarryx_u64(x504, x451, x485, &x506);
+ { uint64_t x509; uint8_t x510 = _addcarryx_u64(x507, x453, x488, &x509);
+ { uint64_t x513; uint64_t x512 = _mulx_u64(x491, 0xffffffffffffffffL, &x513);
+ { uint64_t x516; uint64_t x515 = _mulx_u64(x491, 0xffffffffffffffffL, &x516);
+ { uint64_t x519; uint64_t x518 = _mulx_u64(x491, 0xfffffffdffffffffL, &x519);
+ { uint64_t x522; uint64_t x521 = _mulx_u64(x491, 0xffffffffffffffffL, &x522);
+ { uint64_t x525; uint64_t x524 = _mulx_u64(x491, 0xffffffffffffffffL, &x525);
+ { uint64_t x527, uint8_t x528 = Op (Syntax.MulSplit 64 (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x491, 0x3);
+ { uint64_t x530; uint8_t x531 = _addcarryx_u64(0x0, x513, x515, &x530);
+ { uint64_t x533; uint8_t x534 = _addcarryx_u64(x531, x516, x518, &x533);
+ { uint64_t x536; uint8_t x537 = _addcarryx_u64(x534, x519, x521, &x536);
+ { uint64_t x539; uint8_t x540 = _addcarryx_u64(x537, x522, x524, &x539);
+ { uint64_t x542; uint8_t x543 = _addcarryx_u64(x540, x525, x527, &x542);
+ { uint8_t x544 = (x543 + x528);
+ { uint64_t _; uint8_t x547 = _addcarryx_u64(0x0, x491, x512, &_);
+ { uint64_t x549; uint8_t x550 = _addcarryx_u64(x547, x494, x530, &x549);
+ { uint64_t x552; uint8_t x553 = _addcarryx_u64(x550, x497, x533, &x552);
+ { uint64_t x555; uint8_t x556 = _addcarryx_u64(x553, x500, x536, &x555);
+ { uint64_t x558; uint8_t x559 = _addcarryx_u64(x556, x503, x539, &x558);
+ { uint64_t x561; uint8_t x562 = _addcarryx_u64(x559, x506, x542, &x561);
+ { uint64_t x564; uint8_t x565 = _addcarryx_u64(x562, x509, x544, &x564);
+ { uint8_t x566 = (x565 + x510);
+ { uint64_t x569; uint64_t x568 = _mulx_u64(x12, x15, &x569);
+ { uint64_t x572; uint64_t x571 = _mulx_u64(x12, x17, &x572);
+ { uint64_t x575; uint64_t x574 = _mulx_u64(x12, x19, &x575);
+ { uint64_t x578; uint64_t x577 = _mulx_u64(x12, x21, &x578);
+ { uint64_t x581; uint64_t x580 = _mulx_u64(x12, x23, &x581);
+ { uint64_t x584; uint64_t x583 = _mulx_u64(x12, x22, &x584);
+ { uint64_t x586; uint8_t x587 = _addcarryx_u64(0x0, x569, x571, &x586);
+ { uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x572, x574, &x589);
+ { uint64_t x592; uint8_t x593 = _addcarryx_u64(x590, x575, x577, &x592);
+ { uint64_t x595; uint8_t x596 = _addcarryx_u64(x593, x578, x580, &x595);
+ { uint64_t x598; uint8_t x599 = _addcarryx_u64(x596, x581, x583, &x598);
+ { uint64_t x601; uint8_t _ = _addcarryx_u64(0x0, x599, x584, &x601);
+ { uint64_t x604; uint8_t x605 = _addcarryx_u64(0x0, x549, x568, &x604);
+ { uint64_t x607; uint8_t x608 = _addcarryx_u64(x605, x552, x586, &x607);
+ { uint64_t x610; uint8_t x611 = _addcarryx_u64(x608, x555, x589, &x610);
+ { uint64_t x613; uint8_t x614 = _addcarryx_u64(x611, x558, x592, &x613);
+ { uint64_t x616; uint8_t x617 = _addcarryx_u64(x614, x561, x595, &x616);
+ { uint64_t x619; uint8_t x620 = _addcarryx_u64(x617, x564, x598, &x619);
+ { uint64_t x622; uint8_t x623 = _addcarryx_u64(x620, x566, x601, &x622);
+ { uint64_t x626; uint64_t x625 = _mulx_u64(x604, 0xffffffffffffffffL, &x626);
+ { uint64_t x629; uint64_t x628 = _mulx_u64(x604, 0xffffffffffffffffL, &x629);
+ { uint64_t x632; uint64_t x631 = _mulx_u64(x604, 0xfffffffdffffffffL, &x632);
+ { uint64_t x635; uint64_t x634 = _mulx_u64(x604, 0xffffffffffffffffL, &x635);
+ { uint64_t x638; uint64_t x637 = _mulx_u64(x604, 0xffffffffffffffffL, &x638);
+ { uint64_t x640, uint8_t x641 = Op (Syntax.MulSplit 64 (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x604, 0x3);
+ { uint64_t x643; uint8_t x644 = _addcarryx_u64(0x0, x626, x628, &x643);
+ { uint64_t x646; uint8_t x647 = _addcarryx_u64(x644, x629, x631, &x646);
+ { uint64_t x649; uint8_t x650 = _addcarryx_u64(x647, x632, x634, &x649);
+ { uint64_t x652; uint8_t x653 = _addcarryx_u64(x650, x635, x637, &x652);
+ { uint64_t x655; uint8_t x656 = _addcarryx_u64(x653, x638, x640, &x655);
+ { uint8_t x657 = (x656 + x641);
+ { uint64_t _; uint8_t x660 = _addcarryx_u64(0x0, x604, x625, &_);
+ { uint64_t x662; uint8_t x663 = _addcarryx_u64(x660, x607, x643, &x662);
+ { uint64_t x665; uint8_t x666 = _addcarryx_u64(x663, x610, x646, &x665);
+ { uint64_t x668; uint8_t x669 = _addcarryx_u64(x666, x613, x649, &x668);
+ { uint64_t x671; uint8_t x672 = _addcarryx_u64(x669, x616, x652, &x671);
+ { uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x619, x655, &x674);
+ { uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x622, x657, &x677);
+ { uint8_t x679 = (x678 + x623);
+ { uint64_t x681; uint8_t x682 = _subborrow_u64(0x0, x662, 0xffffffffffffffffL, &x681);
+ { uint64_t x684; uint8_t x685 = _subborrow_u64(x682, x665, 0xffffffffffffffffL, &x684);
+ { uint64_t x687; uint8_t x688 = _subborrow_u64(x685, x668, 0xfffffffdffffffffL, &x687);
+ { uint64_t x690; uint8_t x691 = _subborrow_u64(x688, x671, 0xffffffffffffffffL, &x690);
+ { uint64_t x693; uint8_t x694 = _subborrow_u64(x691, x674, 0xffffffffffffffffL, &x693);
+ { uint64_t x696; uint8_t x697 = _subborrow_u64(x694, x677, 0x3, &x696);
+ { uint64_t _; uint8_t x700 = _subborrow_u64(x697, x679, 0x0, &_);
+ { uint64_t x701 = cmovznz(x700, x696, x677);
+ { uint64_t x702 = cmovznz(x700, x693, x674);
+ { uint64_t x703 = cmovznz(x700, x690, x671);
+ { uint64_t x704 = cmovznz(x700, x687, x668);
+ { uint64_t x705 = cmovznz(x700, x684, x665);
+ { uint64_t x706 = cmovznz(x700, x681, x662);
+ out[0] = x706;
+ out[1] = x705;
+ out[2] = x704;
+ out[3] = x703;
+ out[4] = x702;
+ out[5] = x701;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e322m2e161m1/fenz.c b/src/Specific/montgomery64_2e322m2e161m1/fenz.c
index ad3763a14..c93f74257 100644
--- a/src/Specific/montgomery64_2e322m2e161m1/fenz.c
+++ b/src/Specific/montgomery64_2e322m2e161m1/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x11 = (x10 | x9);
-{ uint64_t x12 = (x8 | x11);
-{ uint64_t x13 = (x6 | x12);
-{ uint64_t x14 = (x4 | x13);
-{ uint64_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x11 = (x10 | x9);
+ { uint64_t x12 = (x8 | x11);
+ { uint64_t x13 = (x6 | x12);
+ { uint64_t x14 = (x4 | x13);
+ { uint64_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e322m2e161m1/feopp.c b/src/Specific/montgomery64_2e322m2e161m1/feopp.c
index f5026da1d..20fac0a32 100644
--- a/src/Specific/montgomery64_2e322m2e161m1/feopp.c
+++ b/src/Specific/montgomery64_2e322m2e161m1/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
-{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
-{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
-{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
-{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
-{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
-{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
-{ uint64_t x30 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
-{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
-{ uint64_t x38 = (x29 & 0xfffffffdffffffffL);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
-{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
-{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
-{ uint8_t x50 = ((uint8_t)x29 & 0x3);
-{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+ { uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+ { uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+ { uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+ { uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+ { uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+ { uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+ { uint64_t x30 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+ { uint64_t x34 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+ { uint64_t x38 = (x29 & 0xfffffffdffffffffL);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+ { uint64_t x42 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+ { uint64_t x46 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+ { uint8_t x50 = ((uint8_t)x29 & 0x3);
+ { uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e322m2e161m1/fesub.c b/src/Specific/montgomery64_2e322m2e161m1/fesub.c
index e8818feb1..82c028b3d 100644
--- a/src/Specific/montgomery64_2e322m2e161m1/fesub.c
+++ b/src/Specific/montgomery64_2e322m2e161m1/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
-{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
-{ uint64_t x43 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
-{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
-{ uint64_t x51 = (x42 & 0xfffffffdffffffffL);
-{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
-{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
-{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
-{ uint8_t x63 = ((uint8_t)x42 & 0x3);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+ { uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+ { uint64_t x43 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+ { uint64_t x47 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+ { uint64_t x51 = (x42 & 0xfffffffdffffffffL);
+ { uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+ { uint64_t x55 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+ { uint64_t x59 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+ { uint8_t x63 = ((uint8_t)x42 & 0x3);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e336m17/feadd.c b/src/Specific/montgomery64_2e336m17/feadd.c
index 62ecf8d23..e98ee655f 100644
--- a/src/Specific/montgomery64_2e336m17/feadd.c
+++ b/src/Specific/montgomery64_2e336m17/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
-{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffffefL, &x43);
-{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
-{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
-{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
-{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
-{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0xffff, &x58);
-{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
-{ uint64_t x63 = cmovznz(x62, x58, x40);
-{ uint64_t x64 = cmovznz(x62, x55, x37);
-{ uint64_t x65 = cmovznz(x62, x52, x34);
-{ uint64_t x66 = cmovznz(x62, x49, x31);
-{ uint64_t x67 = cmovznz(x62, x46, x28);
-{ uint64_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+ { uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffffefL, &x43);
+ { uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+ { uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+ { uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+ { uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+ { uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0xffff, &x58);
+ { uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+ { uint64_t x63 = cmovznz(x62, x58, x40);
+ { uint64_t x64 = cmovznz(x62, x55, x37);
+ { uint64_t x65 = cmovznz(x62, x52, x34);
+ { uint64_t x66 = cmovznz(x62, x49, x31);
+ { uint64_t x67 = cmovznz(x62, x46, x28);
+ { uint64_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e336m17/femul.c b/src/Specific/montgomery64_2e336m17/femul.c
index e55a79c42..d6093aeec 100644
--- a/src/Specific/montgomery64_2e336m17/femul.c
+++ b/src/Specific/montgomery64_2e336m17/femul.c
@@ -1,272 +1,266 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
-{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
-{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
-{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
-{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
-{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0xf0f0f0f0f0f0f0f1L, &_);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xffffffffffffffefL, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
-{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
-{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
-{ uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0xffff, &x80);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
-{ uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
-{ uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
-{ uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
-{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
-{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
-{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
-{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
-{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
-{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
-{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
-{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
-{ uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
-{ uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
-{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
-{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
-{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
-{ uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
-{ uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
-{ uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
-{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
-{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
-{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
-{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
-{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
-{ uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
-{ uint64_t _; uint64_t x178 = _mulx_u64(x157, 0xf0f0f0f0f0f0f0f1L, &_);
-{ uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xffffffffffffffefL, &x182);
-{ uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
-{ uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
-{ uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
-{ uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
-{ uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0xffff, &x197);
-{ uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
-{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
-{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
-{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
-{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
-{ uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
-{ uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
-{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
-{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
-{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
-{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
-{ uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
-{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
-{ uint8_t x237 = (x236 + x176);
-{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
-{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
-{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
-{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
-{ uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
-{ uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
-{ uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
-{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
-{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
-{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
-{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
-{ uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
-{ uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
-{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
-{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
-{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
-{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
-{ uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
-{ uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
-{ uint64_t _; uint64_t x296 = _mulx_u64(x275, 0xf0f0f0f0f0f0f0f1L, &_);
-{ uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xffffffffffffffefL, &x300);
-{ uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
-{ uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
-{ uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
-{ uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
-{ uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0xffff, &x315);
-{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
-{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
-{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
-{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
-{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
-{ uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
-{ uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
-{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
-{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
-{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
-{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
-{ uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
-{ uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
-{ uint8_t x355 = (x354 + x294);
-{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
-{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
-{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
-{ uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
-{ uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
-{ uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
-{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
-{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
-{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
-{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
-{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
-{ uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
-{ uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
-{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
-{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
-{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
-{ uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
-{ uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
-{ uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
-{ uint64_t _; uint64_t x414 = _mulx_u64(x393, 0xf0f0f0f0f0f0f0f1L, &_);
-{ uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xffffffffffffffefL, &x418);
-{ uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
-{ uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
-{ uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
-{ uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
-{ uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0xffff, &x433);
-{ uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
-{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
-{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
-{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
-{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
-{ uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
-{ uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
-{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
-{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
-{ uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
-{ uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
-{ uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
-{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
-{ uint8_t x473 = (x472 + x412);
-{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
-{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
-{ uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
-{ uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
-{ uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
-{ uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
-{ uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
-{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
-{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
-{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
-{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
-{ uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
-{ uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
-{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
-{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
-{ uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
-{ uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
-{ uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
-{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
-{ uint64_t _; uint64_t x532 = _mulx_u64(x511, 0xf0f0f0f0f0f0f0f1L, &_);
-{ uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xffffffffffffffefL, &x536);
-{ uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
-{ uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
-{ uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
-{ uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
-{ uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0xffff, &x551);
-{ uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
-{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
-{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
-{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
-{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
-{ uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
-{ uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
-{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
-{ uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
-{ uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
-{ uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
-{ uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
-{ uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
-{ uint8_t x591 = (x590 + x530);
-{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
-{ uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
-{ uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
-{ uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
-{ uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
-{ uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
-{ uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
-{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
-{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
-{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
-{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
-{ uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
-{ uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
-{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
-{ uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
-{ uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
-{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
-{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
-{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
-{ uint64_t _; uint64_t x650 = _mulx_u64(x629, 0xf0f0f0f0f0f0f0f1L, &_);
-{ uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xffffffffffffffefL, &x654);
-{ uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
-{ uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
-{ uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
-{ uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
-{ uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0xffff, &x669);
-{ uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
-{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
-{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
-{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
-{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
-{ uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
-{ uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
-{ uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
-{ uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
-{ uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
-{ uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
-{ uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
-{ uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
-{ uint8_t x709 = (x708 + x648);
-{ uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xffffffffffffffefL, &x711);
-{ uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
-{ uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
-{ uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
-{ uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
-{ uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0xffff, &x726);
-{ uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
-{ uint64_t x731 = cmovznz(x730, x726, x707);
-{ uint64_t x732 = cmovznz(x730, x723, x704);
-{ uint64_t x733 = cmovznz(x730, x720, x701);
-{ uint64_t x734 = cmovznz(x730, x717, x698);
-{ uint64_t x735 = cmovznz(x730, x714, x695);
-{ uint64_t x736 = cmovznz(x730, x711, x692);
-out[0] = x731;
-out[1] = x732;
-out[2] = x733;
-out[3] = x734;
-out[4] = x735;
-out[5] = x736;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+ { uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+ { uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+ { uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+ { uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+ { uint64_t _; uint64_t x61 = _mulx_u64(x25, 0xf0f0f0f0f0f0f0f1L, &_);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xffffffffffffffefL, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
+ { uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+ { uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+ { uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0xffff, &x80);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+ { uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
+ { uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
+ { uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
+ { uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
+ { uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
+ { uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
+ { uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
+ { uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
+ { uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
+ { uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
+ { uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
+ { uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
+ { uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
+ { uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+ { uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+ { uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+ { uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
+ { uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
+ { uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
+ { uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+ { uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+ { uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+ { uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+ { uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
+ { uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
+ { uint64_t _; uint64_t x178 = _mulx_u64(x157, 0xf0f0f0f0f0f0f0f1L, &_);
+ { uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xffffffffffffffefL, &x182);
+ { uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
+ { uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
+ { uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
+ { uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
+ { uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0xffff, &x197);
+ { uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
+ { uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+ { uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+ { uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
+ { uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
+ { uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
+ { uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
+ { uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
+ { uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
+ { uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
+ { uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
+ { uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
+ { uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
+ { uint8_t x237 = (x236 + x176);
+ { uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
+ { uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
+ { uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
+ { uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
+ { uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
+ { uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
+ { uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
+ { uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+ { uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+ { uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
+ { uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
+ { uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
+ { uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
+ { uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+ { uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+ { uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+ { uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
+ { uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
+ { uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
+ { uint64_t _; uint64_t x296 = _mulx_u64(x275, 0xf0f0f0f0f0f0f0f1L, &_);
+ { uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xffffffffffffffefL, &x300);
+ { uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
+ { uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
+ { uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
+ { uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
+ { uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0xffff, &x315);
+ { uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
+ { uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+ { uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
+ { uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
+ { uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
+ { uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
+ { uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
+ { uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
+ { uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
+ { uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
+ { uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
+ { uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
+ { uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
+ { uint8_t x355 = (x354 + x294);
+ { uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
+ { uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
+ { uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
+ { uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
+ { uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
+ { uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
+ { uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
+ { uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+ { uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
+ { uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
+ { uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
+ { uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
+ { uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
+ { uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+ { uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+ { uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
+ { uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
+ { uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
+ { uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
+ { uint64_t _; uint64_t x414 = _mulx_u64(x393, 0xf0f0f0f0f0f0f0f1L, &_);
+ { uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xffffffffffffffefL, &x418);
+ { uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
+ { uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
+ { uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
+ { uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
+ { uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0xffff, &x433);
+ { uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
+ { uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
+ { uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
+ { uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
+ { uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
+ { uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
+ { uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
+ { uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
+ { uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
+ { uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
+ { uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
+ { uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
+ { uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
+ { uint8_t x473 = (x472 + x412);
+ { uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
+ { uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
+ { uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
+ { uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
+ { uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
+ { uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
+ { uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
+ { uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
+ { uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
+ { uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
+ { uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
+ { uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
+ { uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
+ { uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+ { uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
+ { uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
+ { uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
+ { uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
+ { uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
+ { uint64_t _; uint64_t x532 = _mulx_u64(x511, 0xf0f0f0f0f0f0f0f1L, &_);
+ { uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xffffffffffffffefL, &x536);
+ { uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
+ { uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
+ { uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
+ { uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
+ { uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0xffff, &x551);
+ { uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
+ { uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
+ { uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
+ { uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
+ { uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
+ { uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
+ { uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
+ { uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
+ { uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
+ { uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
+ { uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
+ { uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
+ { uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
+ { uint8_t x591 = (x590 + x530);
+ { uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
+ { uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
+ { uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
+ { uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
+ { uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
+ { uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
+ { uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
+ { uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
+ { uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
+ { uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
+ { uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
+ { uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
+ { uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
+ { uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
+ { uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
+ { uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
+ { uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
+ { uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
+ { uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
+ { uint64_t _; uint64_t x650 = _mulx_u64(x629, 0xf0f0f0f0f0f0f0f1L, &_);
+ { uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xffffffffffffffefL, &x654);
+ { uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
+ { uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
+ { uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
+ { uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
+ { uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0xffff, &x669);
+ { uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
+ { uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
+ { uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
+ { uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
+ { uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
+ { uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
+ { uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
+ { uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
+ { uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
+ { uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
+ { uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
+ { uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
+ { uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
+ { uint8_t x709 = (x708 + x648);
+ { uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xffffffffffffffefL, &x711);
+ { uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
+ { uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
+ { uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
+ { uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
+ { uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0xffff, &x726);
+ { uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
+ { uint64_t x731 = cmovznz(x730, x726, x707);
+ { uint64_t x732 = cmovznz(x730, x723, x704);
+ { uint64_t x733 = cmovznz(x730, x720, x701);
+ { uint64_t x734 = cmovznz(x730, x717, x698);
+ { uint64_t x735 = cmovznz(x730, x714, x695);
+ { uint64_t x736 = cmovznz(x730, x711, x692);
+ out[0] = x736;
+ out[1] = x735;
+ out[2] = x734;
+ out[3] = x733;
+ out[4] = x732;
+ out[5] = x731;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e336m17/fenz.c b/src/Specific/montgomery64_2e336m17/fenz.c
index ad3763a14..c93f74257 100644
--- a/src/Specific/montgomery64_2e336m17/fenz.c
+++ b/src/Specific/montgomery64_2e336m17/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x11 = (x10 | x9);
-{ uint64_t x12 = (x8 | x11);
-{ uint64_t x13 = (x6 | x12);
-{ uint64_t x14 = (x4 | x13);
-{ uint64_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x11 = (x10 | x9);
+ { uint64_t x12 = (x8 | x11);
+ { uint64_t x13 = (x6 | x12);
+ { uint64_t x14 = (x4 | x13);
+ { uint64_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e336m17/feopp.c b/src/Specific/montgomery64_2e336m17/feopp.c
index a0d10433e..8b88c3d4b 100644
--- a/src/Specific/montgomery64_2e336m17/feopp.c
+++ b/src/Specific/montgomery64_2e336m17/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
-{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
-{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
-{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
-{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
-{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
-{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
-{ uint64_t x30 = (x29 & 0xffffffffffffffefL);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
-{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
-{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
-{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
-{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
-{ uint64_t x50 = (x29 & 0xffff);
-{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+ { uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+ { uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+ { uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+ { uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+ { uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+ { uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+ { uint64_t x30 = (x29 & 0xffffffffffffffefL);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+ { uint64_t x34 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+ { uint64_t x38 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+ { uint64_t x42 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+ { uint64_t x46 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+ { uint64_t x50 = (x29 & 0xffff);
+ { uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e336m17/fesub.c b/src/Specific/montgomery64_2e336m17/fesub.c
index c7c9d3a78..01ec66a18 100644
--- a/src/Specific/montgomery64_2e336m17/fesub.c
+++ b/src/Specific/montgomery64_2e336m17/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
-{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
-{ uint64_t x43 = (x42 & 0xffffffffffffffefL);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
-{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
-{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
-{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
-{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
-{ uint64_t x63 = (x42 & 0xffff);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+ { uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+ { uint64_t x43 = (x42 & 0xffffffffffffffefL);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+ { uint64_t x47 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+ { uint64_t x51 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+ { uint64_t x55 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+ { uint64_t x59 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+ { uint64_t x63 = (x42 & 0xffff);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e336m3/feadd.c b/src/Specific/montgomery64_2e336m3/feadd.c
index 96ab32ae5..3d4dd3146 100644
--- a/src/Specific/montgomery64_2e336m3/feadd.c
+++ b/src/Specific/montgomery64_2e336m3/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
-{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xfffffffffffffffdL, &x43);
-{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
-{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
-{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
-{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
-{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0xffff, &x58);
-{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
-{ uint64_t x63 = cmovznz(x62, x58, x40);
-{ uint64_t x64 = cmovznz(x62, x55, x37);
-{ uint64_t x65 = cmovznz(x62, x52, x34);
-{ uint64_t x66 = cmovznz(x62, x49, x31);
-{ uint64_t x67 = cmovznz(x62, x46, x28);
-{ uint64_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+ { uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xfffffffffffffffdL, &x43);
+ { uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+ { uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+ { uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+ { uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+ { uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0xffff, &x58);
+ { uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+ { uint64_t x63 = cmovznz(x62, x58, x40);
+ { uint64_t x64 = cmovznz(x62, x55, x37);
+ { uint64_t x65 = cmovznz(x62, x52, x34);
+ { uint64_t x66 = cmovznz(x62, x49, x31);
+ { uint64_t x67 = cmovznz(x62, x46, x28);
+ { uint64_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e336m3/femul.c b/src/Specific/montgomery64_2e336m3/femul.c
index 4184be0e3..3b4e8ef84 100644
--- a/src/Specific/montgomery64_2e336m3/femul.c
+++ b/src/Specific/montgomery64_2e336m3/femul.c
@@ -1,272 +1,266 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
-{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
-{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
-{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
-{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
-{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xfffffffffffffffdL, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
-{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
-{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
-{ uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0xffff, &x80);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
-{ uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
-{ uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
-{ uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
-{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
-{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
-{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
-{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
-{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
-{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
-{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
-{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
-{ uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
-{ uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
-{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
-{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
-{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
-{ uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
-{ uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
-{ uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
-{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
-{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
-{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
-{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
-{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
-{ uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
-{ uint64_t _; uint64_t x178 = _mulx_u64(x157, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xfffffffffffffffdL, &x182);
-{ uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
-{ uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
-{ uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
-{ uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
-{ uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0xffff, &x197);
-{ uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
-{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
-{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
-{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
-{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
-{ uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
-{ uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
-{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
-{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
-{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
-{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
-{ uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
-{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
-{ uint8_t x237 = (x236 + x176);
-{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
-{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
-{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
-{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
-{ uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
-{ uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
-{ uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
-{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
-{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
-{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
-{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
-{ uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
-{ uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
-{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
-{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
-{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
-{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
-{ uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
-{ uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
-{ uint64_t _; uint64_t x296 = _mulx_u64(x275, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xfffffffffffffffdL, &x300);
-{ uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
-{ uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
-{ uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
-{ uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
-{ uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0xffff, &x315);
-{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
-{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
-{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
-{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
-{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
-{ uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
-{ uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
-{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
-{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
-{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
-{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
-{ uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
-{ uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
-{ uint8_t x355 = (x354 + x294);
-{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
-{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
-{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
-{ uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
-{ uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
-{ uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
-{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
-{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
-{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
-{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
-{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
-{ uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
-{ uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
-{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
-{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
-{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
-{ uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
-{ uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
-{ uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
-{ uint64_t _; uint64_t x414 = _mulx_u64(x393, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xfffffffffffffffdL, &x418);
-{ uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
-{ uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
-{ uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
-{ uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
-{ uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0xffff, &x433);
-{ uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
-{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
-{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
-{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
-{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
-{ uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
-{ uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
-{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
-{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
-{ uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
-{ uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
-{ uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
-{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
-{ uint8_t x473 = (x472 + x412);
-{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
-{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
-{ uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
-{ uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
-{ uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
-{ uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
-{ uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
-{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
-{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
-{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
-{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
-{ uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
-{ uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
-{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
-{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
-{ uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
-{ uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
-{ uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
-{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
-{ uint64_t _; uint64_t x532 = _mulx_u64(x511, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xfffffffffffffffdL, &x536);
-{ uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
-{ uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
-{ uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
-{ uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
-{ uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0xffff, &x551);
-{ uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
-{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
-{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
-{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
-{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
-{ uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
-{ uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
-{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
-{ uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
-{ uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
-{ uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
-{ uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
-{ uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
-{ uint8_t x591 = (x590 + x530);
-{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
-{ uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
-{ uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
-{ uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
-{ uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
-{ uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
-{ uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
-{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
-{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
-{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
-{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
-{ uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
-{ uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
-{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
-{ uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
-{ uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
-{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
-{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
-{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
-{ uint64_t _; uint64_t x650 = _mulx_u64(x629, 0xaaaaaaaaaaaaaaabL, &_);
-{ uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xfffffffffffffffdL, &x654);
-{ uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
-{ uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
-{ uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
-{ uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
-{ uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0xffff, &x669);
-{ uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
-{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
-{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
-{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
-{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
-{ uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
-{ uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
-{ uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
-{ uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
-{ uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
-{ uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
-{ uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
-{ uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
-{ uint8_t x709 = (x708 + x648);
-{ uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xfffffffffffffffdL, &x711);
-{ uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
-{ uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
-{ uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
-{ uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
-{ uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0xffff, &x726);
-{ uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
-{ uint64_t x731 = cmovznz(x730, x726, x707);
-{ uint64_t x732 = cmovznz(x730, x723, x704);
-{ uint64_t x733 = cmovznz(x730, x720, x701);
-{ uint64_t x734 = cmovznz(x730, x717, x698);
-{ uint64_t x735 = cmovznz(x730, x714, x695);
-{ uint64_t x736 = cmovznz(x730, x711, x692);
-out[0] = x731;
-out[1] = x732;
-out[2] = x733;
-out[3] = x734;
-out[4] = x735;
-out[5] = x736;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+ { uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+ { uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+ { uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+ { uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+ { uint64_t _; uint64_t x61 = _mulx_u64(x25, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xfffffffffffffffdL, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
+ { uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+ { uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+ { uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0xffff, &x80);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+ { uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
+ { uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
+ { uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
+ { uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
+ { uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
+ { uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
+ { uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
+ { uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
+ { uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
+ { uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
+ { uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
+ { uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
+ { uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
+ { uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+ { uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+ { uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+ { uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
+ { uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
+ { uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
+ { uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+ { uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+ { uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+ { uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+ { uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
+ { uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
+ { uint64_t _; uint64_t x178 = _mulx_u64(x157, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xfffffffffffffffdL, &x182);
+ { uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
+ { uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
+ { uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
+ { uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
+ { uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0xffff, &x197);
+ { uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
+ { uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+ { uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+ { uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
+ { uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
+ { uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
+ { uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
+ { uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
+ { uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
+ { uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
+ { uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
+ { uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
+ { uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
+ { uint8_t x237 = (x236 + x176);
+ { uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
+ { uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
+ { uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
+ { uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
+ { uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
+ { uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
+ { uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
+ { uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+ { uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+ { uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
+ { uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
+ { uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
+ { uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
+ { uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+ { uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+ { uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+ { uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
+ { uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
+ { uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
+ { uint64_t _; uint64_t x296 = _mulx_u64(x275, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xfffffffffffffffdL, &x300);
+ { uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
+ { uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
+ { uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
+ { uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
+ { uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0xffff, &x315);
+ { uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
+ { uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+ { uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
+ { uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
+ { uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
+ { uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
+ { uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
+ { uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
+ { uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
+ { uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
+ { uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
+ { uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
+ { uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
+ { uint8_t x355 = (x354 + x294);
+ { uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
+ { uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
+ { uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
+ { uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
+ { uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
+ { uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
+ { uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
+ { uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+ { uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
+ { uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
+ { uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
+ { uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
+ { uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
+ { uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+ { uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+ { uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
+ { uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
+ { uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
+ { uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
+ { uint64_t _; uint64_t x414 = _mulx_u64(x393, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xfffffffffffffffdL, &x418);
+ { uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
+ { uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
+ { uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
+ { uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
+ { uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0xffff, &x433);
+ { uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
+ { uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
+ { uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
+ { uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
+ { uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
+ { uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
+ { uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
+ { uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
+ { uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
+ { uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
+ { uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
+ { uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
+ { uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
+ { uint8_t x473 = (x472 + x412);
+ { uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
+ { uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
+ { uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
+ { uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
+ { uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
+ { uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
+ { uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
+ { uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
+ { uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
+ { uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
+ { uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
+ { uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
+ { uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
+ { uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+ { uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
+ { uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
+ { uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
+ { uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
+ { uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
+ { uint64_t _; uint64_t x532 = _mulx_u64(x511, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xfffffffffffffffdL, &x536);
+ { uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
+ { uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
+ { uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
+ { uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
+ { uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0xffff, &x551);
+ { uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
+ { uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
+ { uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
+ { uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
+ { uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
+ { uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
+ { uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
+ { uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
+ { uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
+ { uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
+ { uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
+ { uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
+ { uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
+ { uint8_t x591 = (x590 + x530);
+ { uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
+ { uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
+ { uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
+ { uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
+ { uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
+ { uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
+ { uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
+ { uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
+ { uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
+ { uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
+ { uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
+ { uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
+ { uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
+ { uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
+ { uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
+ { uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
+ { uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
+ { uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
+ { uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
+ { uint64_t _; uint64_t x650 = _mulx_u64(x629, 0xaaaaaaaaaaaaaaabL, &_);
+ { uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xfffffffffffffffdL, &x654);
+ { uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
+ { uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
+ { uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
+ { uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
+ { uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0xffff, &x669);
+ { uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
+ { uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
+ { uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
+ { uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
+ { uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
+ { uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
+ { uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
+ { uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
+ { uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
+ { uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
+ { uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
+ { uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
+ { uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
+ { uint8_t x709 = (x708 + x648);
+ { uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xfffffffffffffffdL, &x711);
+ { uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
+ { uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
+ { uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
+ { uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
+ { uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0xffff, &x726);
+ { uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
+ { uint64_t x731 = cmovznz(x730, x726, x707);
+ { uint64_t x732 = cmovznz(x730, x723, x704);
+ { uint64_t x733 = cmovznz(x730, x720, x701);
+ { uint64_t x734 = cmovznz(x730, x717, x698);
+ { uint64_t x735 = cmovznz(x730, x714, x695);
+ { uint64_t x736 = cmovznz(x730, x711, x692);
+ out[0] = x736;
+ out[1] = x735;
+ out[2] = x734;
+ out[3] = x733;
+ out[4] = x732;
+ out[5] = x731;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e336m3/fenz.c b/src/Specific/montgomery64_2e336m3/fenz.c
index ad3763a14..c93f74257 100644
--- a/src/Specific/montgomery64_2e336m3/fenz.c
+++ b/src/Specific/montgomery64_2e336m3/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x11 = (x10 | x9);
-{ uint64_t x12 = (x8 | x11);
-{ uint64_t x13 = (x6 | x12);
-{ uint64_t x14 = (x4 | x13);
-{ uint64_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x11 = (x10 | x9);
+ { uint64_t x12 = (x8 | x11);
+ { uint64_t x13 = (x6 | x12);
+ { uint64_t x14 = (x4 | x13);
+ { uint64_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e336m3/feopp.c b/src/Specific/montgomery64_2e336m3/feopp.c
index 0bfd80774..08c26ff7b 100644
--- a/src/Specific/montgomery64_2e336m3/feopp.c
+++ b/src/Specific/montgomery64_2e336m3/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
-{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
-{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
-{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
-{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
-{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
-{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
-{ uint64_t x30 = (x29 & 0xfffffffffffffffdL);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
-{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
-{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
-{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
-{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
-{ uint64_t x50 = (x29 & 0xffff);
-{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+ { uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+ { uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+ { uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+ { uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+ { uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+ { uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+ { uint64_t x30 = (x29 & 0xfffffffffffffffdL);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+ { uint64_t x34 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+ { uint64_t x38 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+ { uint64_t x42 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+ { uint64_t x46 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+ { uint64_t x50 = (x29 & 0xffff);
+ { uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e336m3/fesub.c b/src/Specific/montgomery64_2e336m3/fesub.c
index 268ea9437..bc6f89b2a 100644
--- a/src/Specific/montgomery64_2e336m3/fesub.c
+++ b/src/Specific/montgomery64_2e336m3/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
-{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
-{ uint64_t x43 = (x42 & 0xfffffffffffffffdL);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
-{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
-{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
-{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
-{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
-{ uint64_t x63 = (x42 & 0xffff);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+ { uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+ { uint64_t x43 = (x42 & 0xfffffffffffffffdL);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+ { uint64_t x47 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+ { uint64_t x51 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+ { uint64_t x55 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+ { uint64_t x59 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+ { uint64_t x63 = (x42 & 0xffff);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e338m15/feadd.c b/src/Specific/montgomery64_2e338m15/feadd.c
index 634312cc1..2c9c7ee9c 100644
--- a/src/Specific/montgomery64_2e338m15/feadd.c
+++ b/src/Specific/montgomery64_2e338m15/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
-{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xfffffffffffffff1L, &x43);
-{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
-{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
-{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
-{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
-{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x3ffff, &x58);
-{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
-{ uint64_t x63 = cmovznz(x62, x58, x40);
-{ uint64_t x64 = cmovznz(x62, x55, x37);
-{ uint64_t x65 = cmovznz(x62, x52, x34);
-{ uint64_t x66 = cmovznz(x62, x49, x31);
-{ uint64_t x67 = cmovznz(x62, x46, x28);
-{ uint64_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+ { uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xfffffffffffffff1L, &x43);
+ { uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+ { uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+ { uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+ { uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+ { uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x3ffff, &x58);
+ { uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+ { uint64_t x63 = cmovznz(x62, x58, x40);
+ { uint64_t x64 = cmovznz(x62, x55, x37);
+ { uint64_t x65 = cmovznz(x62, x52, x34);
+ { uint64_t x66 = cmovznz(x62, x49, x31);
+ { uint64_t x67 = cmovznz(x62, x46, x28);
+ { uint64_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e338m15/femul.c b/src/Specific/montgomery64_2e338m15/femul.c
index 2d8f35d6c..e1d191853 100644
--- a/src/Specific/montgomery64_2e338m15/femul.c
+++ b/src/Specific/montgomery64_2e338m15/femul.c
@@ -1,272 +1,266 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
-{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
-{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
-{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
-{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
-{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0xeeeeeeeeeeeeeeefL, &_);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xfffffffffffffff1L, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
-{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
-{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
-{ uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0x3ffff, &x80);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
-{ uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
-{ uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
-{ uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
-{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
-{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
-{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
-{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
-{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
-{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
-{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
-{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
-{ uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
-{ uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
-{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
-{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
-{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
-{ uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
-{ uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
-{ uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
-{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
-{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
-{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
-{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
-{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
-{ uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
-{ uint64_t _; uint64_t x178 = _mulx_u64(x157, 0xeeeeeeeeeeeeeeefL, &_);
-{ uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xfffffffffffffff1L, &x182);
-{ uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
-{ uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
-{ uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
-{ uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
-{ uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0x3ffff, &x197);
-{ uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
-{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
-{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
-{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
-{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
-{ uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
-{ uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
-{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
-{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
-{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
-{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
-{ uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
-{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
-{ uint8_t x237 = (x236 + x176);
-{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
-{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
-{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
-{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
-{ uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
-{ uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
-{ uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
-{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
-{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
-{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
-{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
-{ uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
-{ uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
-{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
-{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
-{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
-{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
-{ uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
-{ uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
-{ uint64_t _; uint64_t x296 = _mulx_u64(x275, 0xeeeeeeeeeeeeeeefL, &_);
-{ uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xfffffffffffffff1L, &x300);
-{ uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
-{ uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
-{ uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
-{ uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
-{ uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0x3ffff, &x315);
-{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
-{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
-{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
-{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
-{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
-{ uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
-{ uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
-{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
-{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
-{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
-{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
-{ uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
-{ uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
-{ uint8_t x355 = (x354 + x294);
-{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
-{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
-{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
-{ uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
-{ uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
-{ uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
-{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
-{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
-{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
-{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
-{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
-{ uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
-{ uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
-{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
-{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
-{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
-{ uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
-{ uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
-{ uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
-{ uint64_t _; uint64_t x414 = _mulx_u64(x393, 0xeeeeeeeeeeeeeeefL, &_);
-{ uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xfffffffffffffff1L, &x418);
-{ uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
-{ uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
-{ uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
-{ uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
-{ uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0x3ffff, &x433);
-{ uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
-{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
-{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
-{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
-{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
-{ uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
-{ uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
-{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
-{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
-{ uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
-{ uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
-{ uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
-{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
-{ uint8_t x473 = (x472 + x412);
-{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
-{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
-{ uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
-{ uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
-{ uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
-{ uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
-{ uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
-{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
-{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
-{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
-{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
-{ uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
-{ uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
-{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
-{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
-{ uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
-{ uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
-{ uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
-{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
-{ uint64_t _; uint64_t x532 = _mulx_u64(x511, 0xeeeeeeeeeeeeeeefL, &_);
-{ uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xfffffffffffffff1L, &x536);
-{ uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
-{ uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
-{ uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
-{ uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
-{ uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0x3ffff, &x551);
-{ uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
-{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
-{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
-{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
-{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
-{ uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
-{ uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
-{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
-{ uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
-{ uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
-{ uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
-{ uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
-{ uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
-{ uint8_t x591 = (x590 + x530);
-{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
-{ uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
-{ uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
-{ uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
-{ uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
-{ uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
-{ uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
-{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
-{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
-{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
-{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
-{ uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
-{ uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
-{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
-{ uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
-{ uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
-{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
-{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
-{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
-{ uint64_t _; uint64_t x650 = _mulx_u64(x629, 0xeeeeeeeeeeeeeeefL, &_);
-{ uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xfffffffffffffff1L, &x654);
-{ uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
-{ uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
-{ uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
-{ uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
-{ uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0x3ffff, &x669);
-{ uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
-{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
-{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
-{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
-{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
-{ uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
-{ uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
-{ uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
-{ uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
-{ uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
-{ uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
-{ uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
-{ uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
-{ uint8_t x709 = (x708 + x648);
-{ uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xfffffffffffffff1L, &x711);
-{ uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
-{ uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
-{ uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
-{ uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
-{ uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0x3ffff, &x726);
-{ uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
-{ uint64_t x731 = cmovznz(x730, x726, x707);
-{ uint64_t x732 = cmovznz(x730, x723, x704);
-{ uint64_t x733 = cmovznz(x730, x720, x701);
-{ uint64_t x734 = cmovznz(x730, x717, x698);
-{ uint64_t x735 = cmovznz(x730, x714, x695);
-{ uint64_t x736 = cmovznz(x730, x711, x692);
-out[0] = x731;
-out[1] = x732;
-out[2] = x733;
-out[3] = x734;
-out[4] = x735;
-out[5] = x736;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+ { uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+ { uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+ { uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+ { uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+ { uint64_t _; uint64_t x61 = _mulx_u64(x25, 0xeeeeeeeeeeeeeeefL, &_);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xfffffffffffffff1L, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
+ { uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+ { uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+ { uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0x3ffff, &x80);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+ { uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
+ { uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
+ { uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
+ { uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
+ { uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
+ { uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
+ { uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
+ { uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
+ { uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
+ { uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
+ { uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
+ { uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
+ { uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
+ { uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+ { uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+ { uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+ { uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
+ { uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
+ { uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
+ { uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+ { uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+ { uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+ { uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+ { uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
+ { uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
+ { uint64_t _; uint64_t x178 = _mulx_u64(x157, 0xeeeeeeeeeeeeeeefL, &_);
+ { uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xfffffffffffffff1L, &x182);
+ { uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
+ { uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
+ { uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
+ { uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
+ { uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0x3ffff, &x197);
+ { uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
+ { uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+ { uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+ { uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
+ { uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
+ { uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
+ { uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
+ { uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
+ { uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
+ { uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
+ { uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
+ { uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
+ { uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
+ { uint8_t x237 = (x236 + x176);
+ { uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
+ { uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
+ { uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
+ { uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
+ { uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
+ { uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
+ { uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
+ { uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+ { uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+ { uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
+ { uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
+ { uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
+ { uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
+ { uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+ { uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+ { uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+ { uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
+ { uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
+ { uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
+ { uint64_t _; uint64_t x296 = _mulx_u64(x275, 0xeeeeeeeeeeeeeeefL, &_);
+ { uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xfffffffffffffff1L, &x300);
+ { uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
+ { uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
+ { uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
+ { uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
+ { uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0x3ffff, &x315);
+ { uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
+ { uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+ { uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
+ { uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
+ { uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
+ { uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
+ { uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
+ { uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
+ { uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
+ { uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
+ { uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
+ { uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
+ { uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
+ { uint8_t x355 = (x354 + x294);
+ { uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
+ { uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
+ { uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
+ { uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
+ { uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
+ { uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
+ { uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
+ { uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+ { uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
+ { uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
+ { uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
+ { uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
+ { uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
+ { uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+ { uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+ { uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
+ { uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
+ { uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
+ { uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
+ { uint64_t _; uint64_t x414 = _mulx_u64(x393, 0xeeeeeeeeeeeeeeefL, &_);
+ { uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xfffffffffffffff1L, &x418);
+ { uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
+ { uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
+ { uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
+ { uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
+ { uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0x3ffff, &x433);
+ { uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
+ { uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
+ { uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
+ { uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
+ { uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
+ { uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
+ { uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
+ { uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
+ { uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
+ { uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
+ { uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
+ { uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
+ { uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
+ { uint8_t x473 = (x472 + x412);
+ { uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
+ { uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
+ { uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
+ { uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
+ { uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
+ { uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
+ { uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
+ { uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
+ { uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
+ { uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
+ { uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
+ { uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
+ { uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
+ { uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+ { uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
+ { uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
+ { uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
+ { uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
+ { uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
+ { uint64_t _; uint64_t x532 = _mulx_u64(x511, 0xeeeeeeeeeeeeeeefL, &_);
+ { uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xfffffffffffffff1L, &x536);
+ { uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
+ { uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
+ { uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
+ { uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
+ { uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0x3ffff, &x551);
+ { uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
+ { uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
+ { uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
+ { uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
+ { uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
+ { uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
+ { uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
+ { uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
+ { uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
+ { uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
+ { uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
+ { uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
+ { uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
+ { uint8_t x591 = (x590 + x530);
+ { uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
+ { uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
+ { uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
+ { uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
+ { uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
+ { uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
+ { uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
+ { uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
+ { uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
+ { uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
+ { uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
+ { uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
+ { uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
+ { uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
+ { uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
+ { uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
+ { uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
+ { uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
+ { uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
+ { uint64_t _; uint64_t x650 = _mulx_u64(x629, 0xeeeeeeeeeeeeeeefL, &_);
+ { uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xfffffffffffffff1L, &x654);
+ { uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
+ { uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
+ { uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
+ { uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
+ { uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0x3ffff, &x669);
+ { uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
+ { uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
+ { uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
+ { uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
+ { uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
+ { uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
+ { uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
+ { uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
+ { uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
+ { uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
+ { uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
+ { uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
+ { uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
+ { uint8_t x709 = (x708 + x648);
+ { uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xfffffffffffffff1L, &x711);
+ { uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
+ { uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
+ { uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
+ { uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
+ { uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0x3ffff, &x726);
+ { uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
+ { uint64_t x731 = cmovznz(x730, x726, x707);
+ { uint64_t x732 = cmovznz(x730, x723, x704);
+ { uint64_t x733 = cmovznz(x730, x720, x701);
+ { uint64_t x734 = cmovznz(x730, x717, x698);
+ { uint64_t x735 = cmovznz(x730, x714, x695);
+ { uint64_t x736 = cmovznz(x730, x711, x692);
+ out[0] = x736;
+ out[1] = x735;
+ out[2] = x734;
+ out[3] = x733;
+ out[4] = x732;
+ out[5] = x731;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e338m15/fenz.c b/src/Specific/montgomery64_2e338m15/fenz.c
index ad3763a14..c93f74257 100644
--- a/src/Specific/montgomery64_2e338m15/fenz.c
+++ b/src/Specific/montgomery64_2e338m15/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x11 = (x10 | x9);
-{ uint64_t x12 = (x8 | x11);
-{ uint64_t x13 = (x6 | x12);
-{ uint64_t x14 = (x4 | x13);
-{ uint64_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x11 = (x10 | x9);
+ { uint64_t x12 = (x8 | x11);
+ { uint64_t x13 = (x6 | x12);
+ { uint64_t x14 = (x4 | x13);
+ { uint64_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e338m15/feopp.c b/src/Specific/montgomery64_2e338m15/feopp.c
index e088984dc..6f189861b 100644
--- a/src/Specific/montgomery64_2e338m15/feopp.c
+++ b/src/Specific/montgomery64_2e338m15/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
-{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
-{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
-{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
-{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
-{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
-{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
-{ uint64_t x30 = (x29 & 0xfffffffffffffff1L);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
-{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
-{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
-{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
-{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
-{ uint64_t x50 = (x29 & 0x3ffff);
-{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+ { uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+ { uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+ { uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+ { uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+ { uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+ { uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+ { uint64_t x30 = (x29 & 0xfffffffffffffff1L);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+ { uint64_t x34 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+ { uint64_t x38 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+ { uint64_t x42 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+ { uint64_t x46 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+ { uint64_t x50 = (x29 & 0x3ffff);
+ { uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e338m15/fesub.c b/src/Specific/montgomery64_2e338m15/fesub.c
index c20f02de3..112827e5f 100644
--- a/src/Specific/montgomery64_2e338m15/fesub.c
+++ b/src/Specific/montgomery64_2e338m15/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
-{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
-{ uint64_t x43 = (x42 & 0xfffffffffffffff1L);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
-{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
-{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
-{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
-{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
-{ uint64_t x63 = (x42 & 0x3ffff);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+ { uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+ { uint64_t x43 = (x42 & 0xfffffffffffffff1L);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+ { uint64_t x47 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+ { uint64_t x51 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+ { uint64_t x55 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+ { uint64_t x59 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+ { uint64_t x63 = (x42 & 0x3ffff);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e369m25/feadd.c b/src/Specific/montgomery64_2e369m25/feadd.c
index 20f02c39e..589dda723 100644
--- a/src/Specific/montgomery64_2e369m25/feadd.c
+++ b/src/Specific/montgomery64_2e369m25/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
-{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffffe7L, &x43);
-{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
-{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
-{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
-{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
-{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x1ffffffffffff, &x58);
-{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
-{ uint64_t x63 = cmovznz(x62, x58, x40);
-{ uint64_t x64 = cmovznz(x62, x55, x37);
-{ uint64_t x65 = cmovznz(x62, x52, x34);
-{ uint64_t x66 = cmovznz(x62, x49, x31);
-{ uint64_t x67 = cmovznz(x62, x46, x28);
-{ uint64_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+ { uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffffe7L, &x43);
+ { uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+ { uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+ { uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+ { uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+ { uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x1ffffffffffff, &x58);
+ { uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+ { uint64_t x63 = cmovznz(x62, x58, x40);
+ { uint64_t x64 = cmovznz(x62, x55, x37);
+ { uint64_t x65 = cmovznz(x62, x52, x34);
+ { uint64_t x66 = cmovznz(x62, x49, x31);
+ { uint64_t x67 = cmovznz(x62, x46, x28);
+ { uint64_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e369m25/femul.c b/src/Specific/montgomery64_2e369m25/femul.c
index 9ce0556eb..1b98b998d 100644
--- a/src/Specific/montgomery64_2e369m25/femul.c
+++ b/src/Specific/montgomery64_2e369m25/femul.c
@@ -1,272 +1,266 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
-{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
-{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
-{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
-{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
-{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0x8f5c28f5c28f5c29L, &_);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xffffffffffffffe7L, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
-{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
-{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
-{ uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0x1ffffffffffff, &x80);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
-{ uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
-{ uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
-{ uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
-{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
-{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
-{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
-{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
-{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
-{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
-{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
-{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
-{ uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
-{ uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
-{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
-{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
-{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
-{ uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
-{ uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
-{ uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
-{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
-{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
-{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
-{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
-{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
-{ uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
-{ uint64_t _; uint64_t x178 = _mulx_u64(x157, 0x8f5c28f5c28f5c29L, &_);
-{ uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xffffffffffffffe7L, &x182);
-{ uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
-{ uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
-{ uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
-{ uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
-{ uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0x1ffffffffffff, &x197);
-{ uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
-{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
-{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
-{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
-{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
-{ uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
-{ uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
-{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
-{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
-{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
-{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
-{ uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
-{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
-{ uint8_t x237 = (x236 + x176);
-{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
-{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
-{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
-{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
-{ uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
-{ uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
-{ uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
-{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
-{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
-{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
-{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
-{ uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
-{ uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
-{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
-{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
-{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
-{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
-{ uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
-{ uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
-{ uint64_t _; uint64_t x296 = _mulx_u64(x275, 0x8f5c28f5c28f5c29L, &_);
-{ uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xffffffffffffffe7L, &x300);
-{ uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
-{ uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
-{ uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
-{ uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
-{ uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0x1ffffffffffff, &x315);
-{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
-{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
-{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
-{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
-{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
-{ uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
-{ uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
-{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
-{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
-{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
-{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
-{ uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
-{ uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
-{ uint8_t x355 = (x354 + x294);
-{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
-{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
-{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
-{ uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
-{ uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
-{ uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
-{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
-{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
-{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
-{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
-{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
-{ uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
-{ uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
-{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
-{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
-{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
-{ uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
-{ uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
-{ uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
-{ uint64_t _; uint64_t x414 = _mulx_u64(x393, 0x8f5c28f5c28f5c29L, &_);
-{ uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xffffffffffffffe7L, &x418);
-{ uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
-{ uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
-{ uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
-{ uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
-{ uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0x1ffffffffffff, &x433);
-{ uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
-{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
-{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
-{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
-{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
-{ uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
-{ uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
-{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
-{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
-{ uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
-{ uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
-{ uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
-{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
-{ uint8_t x473 = (x472 + x412);
-{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
-{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
-{ uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
-{ uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
-{ uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
-{ uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
-{ uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
-{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
-{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
-{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
-{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
-{ uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
-{ uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
-{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
-{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
-{ uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
-{ uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
-{ uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
-{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
-{ uint64_t _; uint64_t x532 = _mulx_u64(x511, 0x8f5c28f5c28f5c29L, &_);
-{ uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xffffffffffffffe7L, &x536);
-{ uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
-{ uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
-{ uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
-{ uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
-{ uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0x1ffffffffffff, &x551);
-{ uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
-{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
-{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
-{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
-{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
-{ uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
-{ uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
-{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
-{ uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
-{ uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
-{ uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
-{ uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
-{ uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
-{ uint8_t x591 = (x590 + x530);
-{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
-{ uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
-{ uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
-{ uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
-{ uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
-{ uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
-{ uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
-{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
-{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
-{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
-{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
-{ uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
-{ uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
-{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
-{ uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
-{ uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
-{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
-{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
-{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
-{ uint64_t _; uint64_t x650 = _mulx_u64(x629, 0x8f5c28f5c28f5c29L, &_);
-{ uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xffffffffffffffe7L, &x654);
-{ uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
-{ uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
-{ uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
-{ uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
-{ uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0x1ffffffffffff, &x669);
-{ uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
-{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
-{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
-{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
-{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
-{ uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
-{ uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
-{ uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
-{ uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
-{ uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
-{ uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
-{ uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
-{ uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
-{ uint8_t x709 = (x708 + x648);
-{ uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xffffffffffffffe7L, &x711);
-{ uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
-{ uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
-{ uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
-{ uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
-{ uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0x1ffffffffffff, &x726);
-{ uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
-{ uint64_t x731 = cmovznz(x730, x726, x707);
-{ uint64_t x732 = cmovznz(x730, x723, x704);
-{ uint64_t x733 = cmovznz(x730, x720, x701);
-{ uint64_t x734 = cmovznz(x730, x717, x698);
-{ uint64_t x735 = cmovznz(x730, x714, x695);
-{ uint64_t x736 = cmovznz(x730, x711, x692);
-out[0] = x731;
-out[1] = x732;
-out[2] = x733;
-out[3] = x734;
-out[4] = x735;
-out[5] = x736;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+ { uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+ { uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+ { uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+ { uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+ { uint64_t _; uint64_t x61 = _mulx_u64(x25, 0x8f5c28f5c28f5c29L, &_);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xffffffffffffffe7L, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
+ { uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+ { uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+ { uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0x1ffffffffffff, &x80);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+ { uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
+ { uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
+ { uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
+ { uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
+ { uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
+ { uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
+ { uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
+ { uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
+ { uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
+ { uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
+ { uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
+ { uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
+ { uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
+ { uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+ { uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+ { uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+ { uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
+ { uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
+ { uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
+ { uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+ { uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+ { uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+ { uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+ { uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
+ { uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
+ { uint64_t _; uint64_t x178 = _mulx_u64(x157, 0x8f5c28f5c28f5c29L, &_);
+ { uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xffffffffffffffe7L, &x182);
+ { uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
+ { uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
+ { uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
+ { uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
+ { uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0x1ffffffffffff, &x197);
+ { uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
+ { uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+ { uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+ { uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
+ { uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
+ { uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
+ { uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
+ { uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
+ { uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
+ { uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
+ { uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
+ { uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
+ { uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
+ { uint8_t x237 = (x236 + x176);
+ { uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
+ { uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
+ { uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
+ { uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
+ { uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
+ { uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
+ { uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
+ { uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+ { uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+ { uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
+ { uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
+ { uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
+ { uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
+ { uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+ { uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+ { uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+ { uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
+ { uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
+ { uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
+ { uint64_t _; uint64_t x296 = _mulx_u64(x275, 0x8f5c28f5c28f5c29L, &_);
+ { uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xffffffffffffffe7L, &x300);
+ { uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
+ { uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
+ { uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
+ { uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
+ { uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0x1ffffffffffff, &x315);
+ { uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
+ { uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+ { uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
+ { uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
+ { uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
+ { uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
+ { uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
+ { uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
+ { uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
+ { uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
+ { uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
+ { uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
+ { uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
+ { uint8_t x355 = (x354 + x294);
+ { uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
+ { uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
+ { uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
+ { uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
+ { uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
+ { uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
+ { uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
+ { uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+ { uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
+ { uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
+ { uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
+ { uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
+ { uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
+ { uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+ { uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+ { uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
+ { uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
+ { uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
+ { uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
+ { uint64_t _; uint64_t x414 = _mulx_u64(x393, 0x8f5c28f5c28f5c29L, &_);
+ { uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xffffffffffffffe7L, &x418);
+ { uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
+ { uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
+ { uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
+ { uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
+ { uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0x1ffffffffffff, &x433);
+ { uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
+ { uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
+ { uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
+ { uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
+ { uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
+ { uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
+ { uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
+ { uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
+ { uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
+ { uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
+ { uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
+ { uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
+ { uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
+ { uint8_t x473 = (x472 + x412);
+ { uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
+ { uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
+ { uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
+ { uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
+ { uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
+ { uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
+ { uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
+ { uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
+ { uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
+ { uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
+ { uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
+ { uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
+ { uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
+ { uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+ { uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
+ { uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
+ { uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
+ { uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
+ { uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
+ { uint64_t _; uint64_t x532 = _mulx_u64(x511, 0x8f5c28f5c28f5c29L, &_);
+ { uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xffffffffffffffe7L, &x536);
+ { uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
+ { uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
+ { uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
+ { uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
+ { uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0x1ffffffffffff, &x551);
+ { uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
+ { uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
+ { uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
+ { uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
+ { uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
+ { uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
+ { uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
+ { uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
+ { uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
+ { uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
+ { uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
+ { uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
+ { uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
+ { uint8_t x591 = (x590 + x530);
+ { uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
+ { uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
+ { uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
+ { uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
+ { uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
+ { uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
+ { uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
+ { uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
+ { uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
+ { uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
+ { uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
+ { uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
+ { uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
+ { uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
+ { uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
+ { uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
+ { uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
+ { uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
+ { uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
+ { uint64_t _; uint64_t x650 = _mulx_u64(x629, 0x8f5c28f5c28f5c29L, &_);
+ { uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xffffffffffffffe7L, &x654);
+ { uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
+ { uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
+ { uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
+ { uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
+ { uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0x1ffffffffffff, &x669);
+ { uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
+ { uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
+ { uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
+ { uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
+ { uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
+ { uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
+ { uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
+ { uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
+ { uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
+ { uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
+ { uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
+ { uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
+ { uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
+ { uint8_t x709 = (x708 + x648);
+ { uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xffffffffffffffe7L, &x711);
+ { uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
+ { uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
+ { uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
+ { uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
+ { uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0x1ffffffffffff, &x726);
+ { uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
+ { uint64_t x731 = cmovznz(x730, x726, x707);
+ { uint64_t x732 = cmovznz(x730, x723, x704);
+ { uint64_t x733 = cmovznz(x730, x720, x701);
+ { uint64_t x734 = cmovznz(x730, x717, x698);
+ { uint64_t x735 = cmovznz(x730, x714, x695);
+ { uint64_t x736 = cmovznz(x730, x711, x692);
+ out[0] = x736;
+ out[1] = x735;
+ out[2] = x734;
+ out[3] = x733;
+ out[4] = x732;
+ out[5] = x731;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e369m25/fenz.c b/src/Specific/montgomery64_2e369m25/fenz.c
index ad3763a14..c93f74257 100644
--- a/src/Specific/montgomery64_2e369m25/fenz.c
+++ b/src/Specific/montgomery64_2e369m25/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x11 = (x10 | x9);
-{ uint64_t x12 = (x8 | x11);
-{ uint64_t x13 = (x6 | x12);
-{ uint64_t x14 = (x4 | x13);
-{ uint64_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x11 = (x10 | x9);
+ { uint64_t x12 = (x8 | x11);
+ { uint64_t x13 = (x6 | x12);
+ { uint64_t x14 = (x4 | x13);
+ { uint64_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e369m25/feopp.c b/src/Specific/montgomery64_2e369m25/feopp.c
index c77996a8b..c6fcdfc69 100644
--- a/src/Specific/montgomery64_2e369m25/feopp.c
+++ b/src/Specific/montgomery64_2e369m25/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
-{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
-{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
-{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
-{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
-{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
-{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
-{ uint64_t x30 = (x29 & 0xffffffffffffffe7L);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
-{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
-{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
-{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
-{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
-{ uint64_t x50 = (x29 & 0x1ffffffffffff);
-{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+ { uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+ { uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+ { uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+ { uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+ { uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+ { uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+ { uint64_t x30 = (x29 & 0xffffffffffffffe7L);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+ { uint64_t x34 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+ { uint64_t x38 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+ { uint64_t x42 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+ { uint64_t x46 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+ { uint64_t x50 = (x29 & 0x1ffffffffffff);
+ { uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e369m25/fesub.c b/src/Specific/montgomery64_2e369m25/fesub.c
index 392994d17..9687d81d4 100644
--- a/src/Specific/montgomery64_2e369m25/fesub.c
+++ b/src/Specific/montgomery64_2e369m25/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
-{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
-{ uint64_t x43 = (x42 & 0xffffffffffffffe7L);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
-{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
-{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
-{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
-{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
-{ uint64_t x63 = (x42 & 0x1ffffffffffff);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+ { uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+ { uint64_t x43 = (x42 & 0xffffffffffffffe7L);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+ { uint64_t x47 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+ { uint64_t x51 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+ { uint64_t x55 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+ { uint64_t x59 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+ { uint64_t x63 = (x42 & 0x1ffffffffffff);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e379m19/feadd.c b/src/Specific/montgomery64_2e379m19/feadd.c
index 7e29347b3..8b05ac62c 100644
--- a/src/Specific/montgomery64_2e379m19/feadd.c
+++ b/src/Specific/montgomery64_2e379m19/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
-{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffffedL, &x43);
-{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
-{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
-{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
-{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
-{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x7ffffffffffffff, &x58);
-{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
-{ uint64_t x63 = cmovznz(x62, x58, x40);
-{ uint64_t x64 = cmovznz(x62, x55, x37);
-{ uint64_t x65 = cmovznz(x62, x52, x34);
-{ uint64_t x66 = cmovznz(x62, x49, x31);
-{ uint64_t x67 = cmovznz(x62, x46, x28);
-{ uint64_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+ { uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffffedL, &x43);
+ { uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+ { uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+ { uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+ { uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+ { uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x7ffffffffffffff, &x58);
+ { uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+ { uint64_t x63 = cmovznz(x62, x58, x40);
+ { uint64_t x64 = cmovznz(x62, x55, x37);
+ { uint64_t x65 = cmovznz(x62, x52, x34);
+ { uint64_t x66 = cmovznz(x62, x49, x31);
+ { uint64_t x67 = cmovznz(x62, x46, x28);
+ { uint64_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e379m19/femul.c b/src/Specific/montgomery64_2e379m19/femul.c
index d641da6dc..8891798d8 100644
--- a/src/Specific/montgomery64_2e379m19/femul.c
+++ b/src/Specific/montgomery64_2e379m19/femul.c
@@ -1,272 +1,266 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
-{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
-{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
-{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
-{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
-{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0x86bca1af286bca1bL, &_);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xffffffffffffffedL, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
-{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
-{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
-{ uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0x7ffffffffffffff, &x80);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
-{ uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
-{ uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
-{ uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
-{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
-{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
-{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
-{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
-{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
-{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
-{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
-{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
-{ uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
-{ uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
-{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
-{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
-{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
-{ uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
-{ uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
-{ uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
-{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
-{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
-{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
-{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
-{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
-{ uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
-{ uint64_t _; uint64_t x178 = _mulx_u64(x157, 0x86bca1af286bca1bL, &_);
-{ uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xffffffffffffffedL, &x182);
-{ uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
-{ uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
-{ uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
-{ uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
-{ uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0x7ffffffffffffff, &x197);
-{ uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
-{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
-{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
-{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
-{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
-{ uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
-{ uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
-{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
-{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
-{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
-{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
-{ uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
-{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
-{ uint8_t x237 = (x236 + x176);
-{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
-{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
-{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
-{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
-{ uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
-{ uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
-{ uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
-{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
-{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
-{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
-{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
-{ uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
-{ uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
-{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
-{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
-{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
-{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
-{ uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
-{ uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
-{ uint64_t _; uint64_t x296 = _mulx_u64(x275, 0x86bca1af286bca1bL, &_);
-{ uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xffffffffffffffedL, &x300);
-{ uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
-{ uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
-{ uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
-{ uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
-{ uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0x7ffffffffffffff, &x315);
-{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
-{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
-{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
-{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
-{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
-{ uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
-{ uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
-{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
-{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
-{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
-{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
-{ uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
-{ uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
-{ uint8_t x355 = (x354 + x294);
-{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
-{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
-{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
-{ uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
-{ uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
-{ uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
-{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
-{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
-{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
-{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
-{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
-{ uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
-{ uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
-{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
-{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
-{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
-{ uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
-{ uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
-{ uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
-{ uint64_t _; uint64_t x414 = _mulx_u64(x393, 0x86bca1af286bca1bL, &_);
-{ uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xffffffffffffffedL, &x418);
-{ uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
-{ uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
-{ uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
-{ uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
-{ uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0x7ffffffffffffff, &x433);
-{ uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
-{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
-{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
-{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
-{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
-{ uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
-{ uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
-{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
-{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
-{ uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
-{ uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
-{ uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
-{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
-{ uint8_t x473 = (x472 + x412);
-{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
-{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
-{ uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
-{ uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
-{ uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
-{ uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
-{ uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
-{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
-{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
-{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
-{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
-{ uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
-{ uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
-{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
-{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
-{ uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
-{ uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
-{ uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
-{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
-{ uint64_t _; uint64_t x532 = _mulx_u64(x511, 0x86bca1af286bca1bL, &_);
-{ uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xffffffffffffffedL, &x536);
-{ uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
-{ uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
-{ uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
-{ uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
-{ uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0x7ffffffffffffff, &x551);
-{ uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
-{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
-{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
-{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
-{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
-{ uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
-{ uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
-{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
-{ uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
-{ uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
-{ uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
-{ uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
-{ uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
-{ uint8_t x591 = (x590 + x530);
-{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
-{ uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
-{ uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
-{ uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
-{ uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
-{ uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
-{ uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
-{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
-{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
-{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
-{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
-{ uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
-{ uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
-{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
-{ uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
-{ uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
-{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
-{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
-{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
-{ uint64_t _; uint64_t x650 = _mulx_u64(x629, 0x86bca1af286bca1bL, &_);
-{ uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xffffffffffffffedL, &x654);
-{ uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
-{ uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
-{ uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
-{ uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
-{ uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0x7ffffffffffffff, &x669);
-{ uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
-{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
-{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
-{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
-{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
-{ uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
-{ uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
-{ uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
-{ uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
-{ uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
-{ uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
-{ uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
-{ uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
-{ uint8_t x709 = (x708 + x648);
-{ uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xffffffffffffffedL, &x711);
-{ uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
-{ uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
-{ uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
-{ uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
-{ uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0x7ffffffffffffff, &x726);
-{ uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
-{ uint64_t x731 = cmovznz(x730, x726, x707);
-{ uint64_t x732 = cmovznz(x730, x723, x704);
-{ uint64_t x733 = cmovznz(x730, x720, x701);
-{ uint64_t x734 = cmovznz(x730, x717, x698);
-{ uint64_t x735 = cmovznz(x730, x714, x695);
-{ uint64_t x736 = cmovznz(x730, x711, x692);
-out[0] = x731;
-out[1] = x732;
-out[2] = x733;
-out[3] = x734;
-out[4] = x735;
-out[5] = x736;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+ { uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+ { uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+ { uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+ { uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+ { uint64_t _; uint64_t x61 = _mulx_u64(x25, 0x86bca1af286bca1bL, &_);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xffffffffffffffedL, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
+ { uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+ { uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+ { uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0x7ffffffffffffff, &x80);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+ { uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
+ { uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
+ { uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
+ { uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
+ { uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
+ { uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
+ { uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
+ { uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
+ { uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
+ { uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
+ { uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
+ { uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
+ { uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
+ { uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+ { uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+ { uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+ { uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
+ { uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
+ { uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
+ { uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+ { uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+ { uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+ { uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+ { uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
+ { uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
+ { uint64_t _; uint64_t x178 = _mulx_u64(x157, 0x86bca1af286bca1bL, &_);
+ { uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xffffffffffffffedL, &x182);
+ { uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
+ { uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
+ { uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
+ { uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
+ { uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0x7ffffffffffffff, &x197);
+ { uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
+ { uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+ { uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+ { uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
+ { uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
+ { uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
+ { uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
+ { uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
+ { uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
+ { uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
+ { uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
+ { uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
+ { uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
+ { uint8_t x237 = (x236 + x176);
+ { uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
+ { uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
+ { uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
+ { uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
+ { uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
+ { uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
+ { uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
+ { uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+ { uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+ { uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
+ { uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
+ { uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
+ { uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
+ { uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+ { uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+ { uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+ { uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
+ { uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
+ { uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
+ { uint64_t _; uint64_t x296 = _mulx_u64(x275, 0x86bca1af286bca1bL, &_);
+ { uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xffffffffffffffedL, &x300);
+ { uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
+ { uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
+ { uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
+ { uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
+ { uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0x7ffffffffffffff, &x315);
+ { uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
+ { uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+ { uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
+ { uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
+ { uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
+ { uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
+ { uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
+ { uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
+ { uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
+ { uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
+ { uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
+ { uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
+ { uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
+ { uint8_t x355 = (x354 + x294);
+ { uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
+ { uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
+ { uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
+ { uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
+ { uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
+ { uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
+ { uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
+ { uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+ { uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
+ { uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
+ { uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
+ { uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
+ { uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
+ { uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+ { uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+ { uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
+ { uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
+ { uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
+ { uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
+ { uint64_t _; uint64_t x414 = _mulx_u64(x393, 0x86bca1af286bca1bL, &_);
+ { uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xffffffffffffffedL, &x418);
+ { uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
+ { uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
+ { uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
+ { uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
+ { uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0x7ffffffffffffff, &x433);
+ { uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
+ { uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
+ { uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
+ { uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
+ { uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
+ { uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
+ { uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
+ { uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
+ { uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
+ { uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
+ { uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
+ { uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
+ { uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
+ { uint8_t x473 = (x472 + x412);
+ { uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
+ { uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
+ { uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
+ { uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
+ { uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
+ { uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
+ { uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
+ { uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
+ { uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
+ { uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
+ { uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
+ { uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
+ { uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
+ { uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+ { uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
+ { uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
+ { uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
+ { uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
+ { uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
+ { uint64_t _; uint64_t x532 = _mulx_u64(x511, 0x86bca1af286bca1bL, &_);
+ { uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xffffffffffffffedL, &x536);
+ { uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
+ { uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
+ { uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
+ { uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
+ { uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0x7ffffffffffffff, &x551);
+ { uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
+ { uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
+ { uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
+ { uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
+ { uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
+ { uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
+ { uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
+ { uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
+ { uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
+ { uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
+ { uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
+ { uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
+ { uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
+ { uint8_t x591 = (x590 + x530);
+ { uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
+ { uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
+ { uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
+ { uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
+ { uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
+ { uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
+ { uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
+ { uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
+ { uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
+ { uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
+ { uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
+ { uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
+ { uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
+ { uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
+ { uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
+ { uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
+ { uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
+ { uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
+ { uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
+ { uint64_t _; uint64_t x650 = _mulx_u64(x629, 0x86bca1af286bca1bL, &_);
+ { uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xffffffffffffffedL, &x654);
+ { uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
+ { uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
+ { uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
+ { uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
+ { uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0x7ffffffffffffff, &x669);
+ { uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
+ { uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
+ { uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
+ { uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
+ { uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
+ { uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
+ { uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
+ { uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
+ { uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
+ { uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
+ { uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
+ { uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
+ { uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
+ { uint8_t x709 = (x708 + x648);
+ { uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xffffffffffffffedL, &x711);
+ { uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
+ { uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
+ { uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
+ { uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
+ { uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0x7ffffffffffffff, &x726);
+ { uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
+ { uint64_t x731 = cmovznz(x730, x726, x707);
+ { uint64_t x732 = cmovznz(x730, x723, x704);
+ { uint64_t x733 = cmovznz(x730, x720, x701);
+ { uint64_t x734 = cmovznz(x730, x717, x698);
+ { uint64_t x735 = cmovznz(x730, x714, x695);
+ { uint64_t x736 = cmovznz(x730, x711, x692);
+ out[0] = x736;
+ out[1] = x735;
+ out[2] = x734;
+ out[3] = x733;
+ out[4] = x732;
+ out[5] = x731;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e379m19/fenz.c b/src/Specific/montgomery64_2e379m19/fenz.c
index ad3763a14..c93f74257 100644
--- a/src/Specific/montgomery64_2e379m19/fenz.c
+++ b/src/Specific/montgomery64_2e379m19/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x11 = (x10 | x9);
-{ uint64_t x12 = (x8 | x11);
-{ uint64_t x13 = (x6 | x12);
-{ uint64_t x14 = (x4 | x13);
-{ uint64_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x11 = (x10 | x9);
+ { uint64_t x12 = (x8 | x11);
+ { uint64_t x13 = (x6 | x12);
+ { uint64_t x14 = (x4 | x13);
+ { uint64_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e379m19/feopp.c b/src/Specific/montgomery64_2e379m19/feopp.c
index 63e2ab312..dc755a17b 100644
--- a/src/Specific/montgomery64_2e379m19/feopp.c
+++ b/src/Specific/montgomery64_2e379m19/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
-{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
-{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
-{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
-{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
-{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
-{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
-{ uint64_t x30 = (x29 & 0xffffffffffffffedL);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
-{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
-{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
-{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
-{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
-{ uint64_t x50 = (x29 & 0x7ffffffffffffff);
-{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+ { uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+ { uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+ { uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+ { uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+ { uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+ { uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+ { uint64_t x30 = (x29 & 0xffffffffffffffedL);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+ { uint64_t x34 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+ { uint64_t x38 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+ { uint64_t x42 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+ { uint64_t x46 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+ { uint64_t x50 = (x29 & 0x7ffffffffffffff);
+ { uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e379m19/fesub.c b/src/Specific/montgomery64_2e379m19/fesub.c
index 8f8ef0681..e2fa7c05d 100644
--- a/src/Specific/montgomery64_2e379m19/fesub.c
+++ b/src/Specific/montgomery64_2e379m19/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
-{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
-{ uint64_t x43 = (x42 & 0xffffffffffffffedL);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
-{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
-{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
-{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
-{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
-{ uint64_t x63 = (x42 & 0x7ffffffffffffff);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+ { uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+ { uint64_t x43 = (x42 & 0xffffffffffffffedL);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+ { uint64_t x47 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+ { uint64_t x51 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+ { uint64_t x55 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+ { uint64_t x59 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+ { uint64_t x63 = (x42 & 0x7ffffffffffffff);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e382m105/feadd.c b/src/Specific/montgomery64_2e382m105/feadd.c
index d2d1c0656..634d11f6c 100644
--- a/src/Specific/montgomery64_2e382m105/feadd.c
+++ b/src/Specific/montgomery64_2e382m105/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
-{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffff97L, &x43);
-{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
-{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
-{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
-{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
-{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x3fffffffffffffff, &x58);
-{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
-{ uint64_t x63 = cmovznz(x62, x58, x40);
-{ uint64_t x64 = cmovznz(x62, x55, x37);
-{ uint64_t x65 = cmovznz(x62, x52, x34);
-{ uint64_t x66 = cmovznz(x62, x49, x31);
-{ uint64_t x67 = cmovznz(x62, x46, x28);
-{ uint64_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+ { uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffff97L, &x43);
+ { uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+ { uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+ { uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+ { uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+ { uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x3fffffffffffffff, &x58);
+ { uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+ { uint64_t x63 = cmovznz(x62, x58, x40);
+ { uint64_t x64 = cmovznz(x62, x55, x37);
+ { uint64_t x65 = cmovznz(x62, x52, x34);
+ { uint64_t x66 = cmovznz(x62, x49, x31);
+ { uint64_t x67 = cmovznz(x62, x46, x28);
+ { uint64_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e382m105/femul.c b/src/Specific/montgomery64_2e382m105/femul.c
index dd2961da5..27ccec503 100644
--- a/src/Specific/montgomery64_2e382m105/femul.c
+++ b/src/Specific/montgomery64_2e382m105/femul.c
@@ -1,272 +1,266 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
-{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
-{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
-{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
-{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
-{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0x8fd8fd8fd8fd8fd9L, &_);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xffffffffffffff97L, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
-{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
-{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
-{ uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0x3fffffffffffffff, &x80);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
-{ uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
-{ uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
-{ uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
-{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
-{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
-{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
-{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
-{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
-{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
-{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
-{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
-{ uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
-{ uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
-{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
-{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
-{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
-{ uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
-{ uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
-{ uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
-{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
-{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
-{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
-{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
-{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
-{ uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
-{ uint64_t _; uint64_t x178 = _mulx_u64(x157, 0x8fd8fd8fd8fd8fd9L, &_);
-{ uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xffffffffffffff97L, &x182);
-{ uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
-{ uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
-{ uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
-{ uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
-{ uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0x3fffffffffffffff, &x197);
-{ uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
-{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
-{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
-{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
-{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
-{ uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
-{ uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
-{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
-{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
-{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
-{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
-{ uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
-{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
-{ uint8_t x237 = (x236 + x176);
-{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
-{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
-{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
-{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
-{ uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
-{ uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
-{ uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
-{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
-{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
-{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
-{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
-{ uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
-{ uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
-{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
-{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
-{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
-{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
-{ uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
-{ uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
-{ uint64_t _; uint64_t x296 = _mulx_u64(x275, 0x8fd8fd8fd8fd8fd9L, &_);
-{ uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xffffffffffffff97L, &x300);
-{ uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
-{ uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
-{ uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
-{ uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
-{ uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0x3fffffffffffffff, &x315);
-{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
-{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
-{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
-{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
-{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
-{ uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
-{ uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
-{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
-{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
-{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
-{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
-{ uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
-{ uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
-{ uint8_t x355 = (x354 + x294);
-{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
-{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
-{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
-{ uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
-{ uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
-{ uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
-{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
-{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
-{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
-{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
-{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
-{ uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
-{ uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
-{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
-{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
-{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
-{ uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
-{ uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
-{ uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
-{ uint64_t _; uint64_t x414 = _mulx_u64(x393, 0x8fd8fd8fd8fd8fd9L, &_);
-{ uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xffffffffffffff97L, &x418);
-{ uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
-{ uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
-{ uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
-{ uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
-{ uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0x3fffffffffffffff, &x433);
-{ uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
-{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
-{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
-{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
-{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
-{ uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
-{ uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
-{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
-{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
-{ uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
-{ uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
-{ uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
-{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
-{ uint8_t x473 = (x472 + x412);
-{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
-{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
-{ uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
-{ uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
-{ uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
-{ uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
-{ uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
-{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
-{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
-{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
-{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
-{ uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
-{ uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
-{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
-{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
-{ uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
-{ uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
-{ uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
-{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
-{ uint64_t _; uint64_t x532 = _mulx_u64(x511, 0x8fd8fd8fd8fd8fd9L, &_);
-{ uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xffffffffffffff97L, &x536);
-{ uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
-{ uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
-{ uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
-{ uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
-{ uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0x3fffffffffffffff, &x551);
-{ uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
-{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
-{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
-{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
-{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
-{ uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
-{ uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
-{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
-{ uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
-{ uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
-{ uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
-{ uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
-{ uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
-{ uint8_t x591 = (x590 + x530);
-{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
-{ uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
-{ uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
-{ uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
-{ uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
-{ uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
-{ uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
-{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
-{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
-{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
-{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
-{ uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
-{ uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
-{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
-{ uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
-{ uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
-{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
-{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
-{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
-{ uint64_t _; uint64_t x650 = _mulx_u64(x629, 0x8fd8fd8fd8fd8fd9L, &_);
-{ uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xffffffffffffff97L, &x654);
-{ uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
-{ uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
-{ uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
-{ uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
-{ uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0x3fffffffffffffff, &x669);
-{ uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
-{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
-{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
-{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
-{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
-{ uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
-{ uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
-{ uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
-{ uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
-{ uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
-{ uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
-{ uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
-{ uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
-{ uint8_t x709 = (x708 + x648);
-{ uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xffffffffffffff97L, &x711);
-{ uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
-{ uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
-{ uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
-{ uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
-{ uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0x3fffffffffffffff, &x726);
-{ uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
-{ uint64_t x731 = cmovznz(x730, x726, x707);
-{ uint64_t x732 = cmovznz(x730, x723, x704);
-{ uint64_t x733 = cmovznz(x730, x720, x701);
-{ uint64_t x734 = cmovznz(x730, x717, x698);
-{ uint64_t x735 = cmovznz(x730, x714, x695);
-{ uint64_t x736 = cmovznz(x730, x711, x692);
-out[0] = x731;
-out[1] = x732;
-out[2] = x733;
-out[3] = x734;
-out[4] = x735;
-out[5] = x736;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+ { uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+ { uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+ { uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+ { uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+ { uint64_t _; uint64_t x61 = _mulx_u64(x25, 0x8fd8fd8fd8fd8fd9L, &_);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xffffffffffffff97L, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
+ { uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+ { uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+ { uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0x3fffffffffffffff, &x80);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+ { uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
+ { uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
+ { uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
+ { uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
+ { uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
+ { uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
+ { uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
+ { uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
+ { uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
+ { uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
+ { uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
+ { uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
+ { uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
+ { uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+ { uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+ { uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+ { uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
+ { uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
+ { uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
+ { uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+ { uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+ { uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+ { uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+ { uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
+ { uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
+ { uint64_t _; uint64_t x178 = _mulx_u64(x157, 0x8fd8fd8fd8fd8fd9L, &_);
+ { uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xffffffffffffff97L, &x182);
+ { uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
+ { uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
+ { uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
+ { uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
+ { uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0x3fffffffffffffff, &x197);
+ { uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
+ { uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+ { uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+ { uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
+ { uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
+ { uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
+ { uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
+ { uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
+ { uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
+ { uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
+ { uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
+ { uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
+ { uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
+ { uint8_t x237 = (x236 + x176);
+ { uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
+ { uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
+ { uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
+ { uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
+ { uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
+ { uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
+ { uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
+ { uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+ { uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+ { uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
+ { uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
+ { uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
+ { uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
+ { uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+ { uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+ { uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+ { uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
+ { uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
+ { uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
+ { uint64_t _; uint64_t x296 = _mulx_u64(x275, 0x8fd8fd8fd8fd8fd9L, &_);
+ { uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xffffffffffffff97L, &x300);
+ { uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
+ { uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
+ { uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
+ { uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
+ { uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0x3fffffffffffffff, &x315);
+ { uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
+ { uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+ { uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
+ { uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
+ { uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
+ { uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
+ { uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
+ { uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
+ { uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
+ { uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
+ { uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
+ { uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
+ { uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
+ { uint8_t x355 = (x354 + x294);
+ { uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
+ { uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
+ { uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
+ { uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
+ { uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
+ { uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
+ { uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
+ { uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+ { uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
+ { uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
+ { uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
+ { uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
+ { uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
+ { uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+ { uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+ { uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
+ { uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
+ { uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
+ { uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
+ { uint64_t _; uint64_t x414 = _mulx_u64(x393, 0x8fd8fd8fd8fd8fd9L, &_);
+ { uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xffffffffffffff97L, &x418);
+ { uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
+ { uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
+ { uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
+ { uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
+ { uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0x3fffffffffffffff, &x433);
+ { uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
+ { uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
+ { uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
+ { uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
+ { uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
+ { uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
+ { uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
+ { uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
+ { uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
+ { uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
+ { uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
+ { uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
+ { uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
+ { uint8_t x473 = (x472 + x412);
+ { uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
+ { uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
+ { uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
+ { uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
+ { uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
+ { uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
+ { uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
+ { uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
+ { uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
+ { uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
+ { uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
+ { uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
+ { uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
+ { uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+ { uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
+ { uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
+ { uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
+ { uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
+ { uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
+ { uint64_t _; uint64_t x532 = _mulx_u64(x511, 0x8fd8fd8fd8fd8fd9L, &_);
+ { uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xffffffffffffff97L, &x536);
+ { uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
+ { uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
+ { uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
+ { uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
+ { uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0x3fffffffffffffff, &x551);
+ { uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
+ { uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
+ { uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
+ { uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
+ { uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
+ { uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
+ { uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
+ { uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
+ { uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
+ { uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
+ { uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
+ { uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
+ { uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
+ { uint8_t x591 = (x590 + x530);
+ { uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
+ { uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
+ { uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
+ { uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
+ { uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
+ { uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
+ { uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
+ { uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
+ { uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
+ { uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
+ { uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
+ { uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
+ { uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
+ { uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
+ { uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
+ { uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
+ { uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
+ { uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
+ { uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
+ { uint64_t _; uint64_t x650 = _mulx_u64(x629, 0x8fd8fd8fd8fd8fd9L, &_);
+ { uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xffffffffffffff97L, &x654);
+ { uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
+ { uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
+ { uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
+ { uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
+ { uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0x3fffffffffffffff, &x669);
+ { uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
+ { uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
+ { uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
+ { uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
+ { uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
+ { uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
+ { uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
+ { uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
+ { uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
+ { uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
+ { uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
+ { uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
+ { uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
+ { uint8_t x709 = (x708 + x648);
+ { uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xffffffffffffff97L, &x711);
+ { uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
+ { uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
+ { uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
+ { uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
+ { uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0x3fffffffffffffff, &x726);
+ { uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
+ { uint64_t x731 = cmovznz(x730, x726, x707);
+ { uint64_t x732 = cmovznz(x730, x723, x704);
+ { uint64_t x733 = cmovznz(x730, x720, x701);
+ { uint64_t x734 = cmovznz(x730, x717, x698);
+ { uint64_t x735 = cmovznz(x730, x714, x695);
+ { uint64_t x736 = cmovznz(x730, x711, x692);
+ out[0] = x736;
+ out[1] = x735;
+ out[2] = x734;
+ out[3] = x733;
+ out[4] = x732;
+ out[5] = x731;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e382m105/fenz.c b/src/Specific/montgomery64_2e382m105/fenz.c
index ad3763a14..c93f74257 100644
--- a/src/Specific/montgomery64_2e382m105/fenz.c
+++ b/src/Specific/montgomery64_2e382m105/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x11 = (x10 | x9);
-{ uint64_t x12 = (x8 | x11);
-{ uint64_t x13 = (x6 | x12);
-{ uint64_t x14 = (x4 | x13);
-{ uint64_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x11 = (x10 | x9);
+ { uint64_t x12 = (x8 | x11);
+ { uint64_t x13 = (x6 | x12);
+ { uint64_t x14 = (x4 | x13);
+ { uint64_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e382m105/feopp.c b/src/Specific/montgomery64_2e382m105/feopp.c
index 611e0b97e..135d1d97d 100644
--- a/src/Specific/montgomery64_2e382m105/feopp.c
+++ b/src/Specific/montgomery64_2e382m105/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
-{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
-{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
-{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
-{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
-{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
-{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
-{ uint64_t x30 = (x29 & 0xffffffffffffff97L);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
-{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
-{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
-{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
-{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
-{ uint64_t x50 = (x29 & 0x3fffffffffffffff);
-{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+ { uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+ { uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+ { uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+ { uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+ { uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+ { uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+ { uint64_t x30 = (x29 & 0xffffffffffffff97L);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+ { uint64_t x34 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+ { uint64_t x38 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+ { uint64_t x42 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+ { uint64_t x46 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+ { uint64_t x50 = (x29 & 0x3fffffffffffffff);
+ { uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e382m105/fesub.c b/src/Specific/montgomery64_2e382m105/fesub.c
index 2f1cd8bb4..75b1e7459 100644
--- a/src/Specific/montgomery64_2e382m105/fesub.c
+++ b/src/Specific/montgomery64_2e382m105/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
-{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
-{ uint64_t x43 = (x42 & 0xffffffffffffff97L);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
-{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
-{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
-{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
-{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
-{ uint64_t x63 = (x42 & 0x3fffffffffffffff);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+ { uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+ { uint64_t x43 = (x42 & 0xffffffffffffff97L);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+ { uint64_t x47 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+ { uint64_t x51 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+ { uint64_t x55 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+ { uint64_t x59 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+ { uint64_t x63 = (x42 & 0x3fffffffffffffff);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e383m187/feadd.c b/src/Specific/montgomery64_2e383m187/feadd.c
index fb1dff18e..3e2f7aa4d 100644
--- a/src/Specific/montgomery64_2e383m187/feadd.c
+++ b/src/Specific/montgomery64_2e383m187/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
-{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffff45L, &x43);
-{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
-{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
-{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
-{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
-{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x7fffffffffffffffL, &x58);
-{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
-{ uint64_t x63 = cmovznz(x62, x58, x40);
-{ uint64_t x64 = cmovznz(x62, x55, x37);
-{ uint64_t x65 = cmovznz(x62, x52, x34);
-{ uint64_t x66 = cmovznz(x62, x49, x31);
-{ uint64_t x67 = cmovznz(x62, x46, x28);
-{ uint64_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+ { uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffff45L, &x43);
+ { uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+ { uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+ { uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+ { uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+ { uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x7fffffffffffffffL, &x58);
+ { uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+ { uint64_t x63 = cmovznz(x62, x58, x40);
+ { uint64_t x64 = cmovznz(x62, x55, x37);
+ { uint64_t x65 = cmovznz(x62, x52, x34);
+ { uint64_t x66 = cmovznz(x62, x49, x31);
+ { uint64_t x67 = cmovznz(x62, x46, x28);
+ { uint64_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e383m187/femul.c b/src/Specific/montgomery64_2e383m187/femul.c
index ab1b51c11..81e5105da 100644
--- a/src/Specific/montgomery64_2e383m187/femul.c
+++ b/src/Specific/montgomery64_2e383m187/femul.c
@@ -1,272 +1,266 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
-{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
-{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
-{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
-{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
-{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0x8a4472fea18a4473L, &_);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xffffffffffffff45L, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
-{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
-{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
-{ uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0x7fffffffffffffffL, &x80);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
-{ uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
-{ uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
-{ uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
-{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
-{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
-{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
-{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
-{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
-{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
-{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
-{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
-{ uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
-{ uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
-{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
-{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
-{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
-{ uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
-{ uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
-{ uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
-{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
-{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
-{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
-{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
-{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
-{ uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
-{ uint64_t _; uint64_t x178 = _mulx_u64(x157, 0x8a4472fea18a4473L, &_);
-{ uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xffffffffffffff45L, &x182);
-{ uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
-{ uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
-{ uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
-{ uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
-{ uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0x7fffffffffffffffL, &x197);
-{ uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
-{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
-{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
-{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
-{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
-{ uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
-{ uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
-{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
-{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
-{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
-{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
-{ uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
-{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
-{ uint8_t x237 = (x236 + x176);
-{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
-{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
-{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
-{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
-{ uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
-{ uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
-{ uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
-{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
-{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
-{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
-{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
-{ uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
-{ uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
-{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
-{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
-{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
-{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
-{ uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
-{ uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
-{ uint64_t _; uint64_t x296 = _mulx_u64(x275, 0x8a4472fea18a4473L, &_);
-{ uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xffffffffffffff45L, &x300);
-{ uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
-{ uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
-{ uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
-{ uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
-{ uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0x7fffffffffffffffL, &x315);
-{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
-{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
-{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
-{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
-{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
-{ uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
-{ uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
-{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
-{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
-{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
-{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
-{ uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
-{ uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
-{ uint8_t x355 = (x354 + x294);
-{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
-{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
-{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
-{ uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
-{ uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
-{ uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
-{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
-{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
-{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
-{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
-{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
-{ uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
-{ uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
-{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
-{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
-{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
-{ uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
-{ uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
-{ uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
-{ uint64_t _; uint64_t x414 = _mulx_u64(x393, 0x8a4472fea18a4473L, &_);
-{ uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xffffffffffffff45L, &x418);
-{ uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
-{ uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
-{ uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
-{ uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
-{ uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0x7fffffffffffffffL, &x433);
-{ uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
-{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
-{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
-{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
-{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
-{ uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
-{ uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
-{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
-{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
-{ uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
-{ uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
-{ uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
-{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
-{ uint8_t x473 = (x472 + x412);
-{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
-{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
-{ uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
-{ uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
-{ uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
-{ uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
-{ uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
-{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
-{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
-{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
-{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
-{ uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
-{ uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
-{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
-{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
-{ uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
-{ uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
-{ uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
-{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
-{ uint64_t _; uint64_t x532 = _mulx_u64(x511, 0x8a4472fea18a4473L, &_);
-{ uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xffffffffffffff45L, &x536);
-{ uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
-{ uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
-{ uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
-{ uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
-{ uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0x7fffffffffffffffL, &x551);
-{ uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
-{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
-{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
-{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
-{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
-{ uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
-{ uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
-{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
-{ uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
-{ uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
-{ uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
-{ uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
-{ uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
-{ uint8_t x591 = (x590 + x530);
-{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
-{ uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
-{ uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
-{ uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
-{ uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
-{ uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
-{ uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
-{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
-{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
-{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
-{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
-{ uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
-{ uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
-{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
-{ uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
-{ uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
-{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
-{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
-{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
-{ uint64_t _; uint64_t x650 = _mulx_u64(x629, 0x8a4472fea18a4473L, &_);
-{ uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xffffffffffffff45L, &x654);
-{ uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
-{ uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
-{ uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
-{ uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
-{ uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0x7fffffffffffffffL, &x669);
-{ uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
-{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
-{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
-{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
-{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
-{ uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
-{ uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
-{ uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
-{ uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
-{ uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
-{ uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
-{ uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
-{ uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
-{ uint8_t x709 = (x708 + x648);
-{ uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xffffffffffffff45L, &x711);
-{ uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
-{ uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
-{ uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
-{ uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
-{ uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0x7fffffffffffffffL, &x726);
-{ uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
-{ uint64_t x731 = cmovznz(x730, x726, x707);
-{ uint64_t x732 = cmovznz(x730, x723, x704);
-{ uint64_t x733 = cmovznz(x730, x720, x701);
-{ uint64_t x734 = cmovznz(x730, x717, x698);
-{ uint64_t x735 = cmovznz(x730, x714, x695);
-{ uint64_t x736 = cmovznz(x730, x711, x692);
-out[0] = x731;
-out[1] = x732;
-out[2] = x733;
-out[3] = x734;
-out[4] = x735;
-out[5] = x736;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+ { uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+ { uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+ { uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+ { uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+ { uint64_t _; uint64_t x61 = _mulx_u64(x25, 0x8a4472fea18a4473L, &_);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xffffffffffffff45L, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
+ { uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+ { uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+ { uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0x7fffffffffffffffL, &x80);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+ { uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
+ { uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
+ { uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
+ { uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
+ { uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
+ { uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
+ { uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
+ { uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
+ { uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
+ { uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
+ { uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
+ { uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
+ { uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
+ { uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+ { uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+ { uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+ { uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
+ { uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
+ { uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
+ { uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+ { uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+ { uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+ { uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+ { uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
+ { uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
+ { uint64_t _; uint64_t x178 = _mulx_u64(x157, 0x8a4472fea18a4473L, &_);
+ { uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xffffffffffffff45L, &x182);
+ { uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
+ { uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
+ { uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
+ { uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
+ { uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0x7fffffffffffffffL, &x197);
+ { uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
+ { uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+ { uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+ { uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
+ { uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
+ { uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
+ { uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
+ { uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
+ { uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
+ { uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
+ { uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
+ { uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
+ { uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
+ { uint8_t x237 = (x236 + x176);
+ { uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
+ { uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
+ { uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
+ { uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
+ { uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
+ { uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
+ { uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
+ { uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+ { uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+ { uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
+ { uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
+ { uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
+ { uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
+ { uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+ { uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+ { uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+ { uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
+ { uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
+ { uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
+ { uint64_t _; uint64_t x296 = _mulx_u64(x275, 0x8a4472fea18a4473L, &_);
+ { uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xffffffffffffff45L, &x300);
+ { uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
+ { uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
+ { uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
+ { uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
+ { uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0x7fffffffffffffffL, &x315);
+ { uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
+ { uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+ { uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
+ { uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
+ { uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
+ { uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
+ { uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
+ { uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
+ { uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
+ { uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
+ { uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
+ { uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
+ { uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
+ { uint8_t x355 = (x354 + x294);
+ { uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
+ { uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
+ { uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
+ { uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
+ { uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
+ { uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
+ { uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
+ { uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+ { uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
+ { uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
+ { uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
+ { uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
+ { uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
+ { uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+ { uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+ { uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
+ { uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
+ { uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
+ { uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
+ { uint64_t _; uint64_t x414 = _mulx_u64(x393, 0x8a4472fea18a4473L, &_);
+ { uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xffffffffffffff45L, &x418);
+ { uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
+ { uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
+ { uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
+ { uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
+ { uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0x7fffffffffffffffL, &x433);
+ { uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
+ { uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
+ { uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
+ { uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
+ { uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
+ { uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
+ { uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
+ { uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
+ { uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
+ { uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
+ { uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
+ { uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
+ { uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
+ { uint8_t x473 = (x472 + x412);
+ { uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
+ { uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
+ { uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
+ { uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
+ { uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
+ { uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
+ { uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
+ { uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
+ { uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
+ { uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
+ { uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
+ { uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
+ { uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
+ { uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+ { uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
+ { uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
+ { uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
+ { uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
+ { uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
+ { uint64_t _; uint64_t x532 = _mulx_u64(x511, 0x8a4472fea18a4473L, &_);
+ { uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xffffffffffffff45L, &x536);
+ { uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
+ { uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
+ { uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
+ { uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
+ { uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0x7fffffffffffffffL, &x551);
+ { uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
+ { uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
+ { uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
+ { uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
+ { uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
+ { uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
+ { uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
+ { uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
+ { uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
+ { uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
+ { uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
+ { uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
+ { uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
+ { uint8_t x591 = (x590 + x530);
+ { uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
+ { uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
+ { uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
+ { uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
+ { uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
+ { uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
+ { uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
+ { uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
+ { uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
+ { uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
+ { uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
+ { uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
+ { uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
+ { uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
+ { uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
+ { uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
+ { uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
+ { uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
+ { uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
+ { uint64_t _; uint64_t x650 = _mulx_u64(x629, 0x8a4472fea18a4473L, &_);
+ { uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xffffffffffffff45L, &x654);
+ { uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
+ { uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
+ { uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
+ { uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
+ { uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0x7fffffffffffffffL, &x669);
+ { uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
+ { uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
+ { uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
+ { uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
+ { uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
+ { uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
+ { uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
+ { uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
+ { uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
+ { uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
+ { uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
+ { uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
+ { uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
+ { uint8_t x709 = (x708 + x648);
+ { uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xffffffffffffff45L, &x711);
+ { uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
+ { uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
+ { uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
+ { uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
+ { uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0x7fffffffffffffffL, &x726);
+ { uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
+ { uint64_t x731 = cmovznz(x730, x726, x707);
+ { uint64_t x732 = cmovznz(x730, x723, x704);
+ { uint64_t x733 = cmovznz(x730, x720, x701);
+ { uint64_t x734 = cmovznz(x730, x717, x698);
+ { uint64_t x735 = cmovznz(x730, x714, x695);
+ { uint64_t x736 = cmovznz(x730, x711, x692);
+ out[0] = x736;
+ out[1] = x735;
+ out[2] = x734;
+ out[3] = x733;
+ out[4] = x732;
+ out[5] = x731;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e383m187/fenz.c b/src/Specific/montgomery64_2e383m187/fenz.c
index ad3763a14..c93f74257 100644
--- a/src/Specific/montgomery64_2e383m187/fenz.c
+++ b/src/Specific/montgomery64_2e383m187/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x11 = (x10 | x9);
-{ uint64_t x12 = (x8 | x11);
-{ uint64_t x13 = (x6 | x12);
-{ uint64_t x14 = (x4 | x13);
-{ uint64_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x11 = (x10 | x9);
+ { uint64_t x12 = (x8 | x11);
+ { uint64_t x13 = (x6 | x12);
+ { uint64_t x14 = (x4 | x13);
+ { uint64_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e383m187/feopp.c b/src/Specific/montgomery64_2e383m187/feopp.c
index 9979ec8e7..b8588ce63 100644
--- a/src/Specific/montgomery64_2e383m187/feopp.c
+++ b/src/Specific/montgomery64_2e383m187/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
-{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
-{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
-{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
-{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
-{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
-{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
-{ uint64_t x30 = (x29 & 0xffffffffffffff45L);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
-{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
-{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
-{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
-{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
-{ uint64_t x50 = (x29 & 0x7fffffffffffffffL);
-{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+ { uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+ { uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+ { uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+ { uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+ { uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+ { uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+ { uint64_t x30 = (x29 & 0xffffffffffffff45L);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+ { uint64_t x34 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+ { uint64_t x38 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+ { uint64_t x42 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+ { uint64_t x46 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+ { uint64_t x50 = (x29 & 0x7fffffffffffffffL);
+ { uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e383m187/fesub.c b/src/Specific/montgomery64_2e383m187/fesub.c
index 62ea521aa..719e0a26a 100644
--- a/src/Specific/montgomery64_2e383m187/fesub.c
+++ b/src/Specific/montgomery64_2e383m187/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
-{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
-{ uint64_t x43 = (x42 & 0xffffffffffffff45L);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
-{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
-{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
-{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
-{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
-{ uint64_t x63 = (x42 & 0x7fffffffffffffffL);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+ { uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+ { uint64_t x43 = (x42 & 0xffffffffffffff45L);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+ { uint64_t x47 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+ { uint64_t x51 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+ { uint64_t x55 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+ { uint64_t x59 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+ { uint64_t x63 = (x42 & 0x7fffffffffffffffL);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e383m31/feadd.c b/src/Specific/montgomery64_2e383m31/feadd.c
index e13035210..7e35a49c6 100644
--- a/src/Specific/montgomery64_2e383m31/feadd.c
+++ b/src/Specific/montgomery64_2e383m31/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
-{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffffe1L, &x43);
-{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
-{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
-{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
-{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
-{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x7fffffffffffffffL, &x58);
-{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
-{ uint64_t x63 = cmovznz(x62, x58, x40);
-{ uint64_t x64 = cmovznz(x62, x55, x37);
-{ uint64_t x65 = cmovznz(x62, x52, x34);
-{ uint64_t x66 = cmovznz(x62, x49, x31);
-{ uint64_t x67 = cmovznz(x62, x46, x28);
-{ uint64_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+ { uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffffe1L, &x43);
+ { uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+ { uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+ { uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+ { uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+ { uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x7fffffffffffffffL, &x58);
+ { uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+ { uint64_t x63 = cmovznz(x62, x58, x40);
+ { uint64_t x64 = cmovznz(x62, x55, x37);
+ { uint64_t x65 = cmovznz(x62, x52, x34);
+ { uint64_t x66 = cmovznz(x62, x49, x31);
+ { uint64_t x67 = cmovznz(x62, x46, x28);
+ { uint64_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e383m31/femul.c b/src/Specific/montgomery64_2e383m31/femul.c
index cae0fe0bd..7e6f6b03a 100644
--- a/src/Specific/montgomery64_2e383m31/femul.c
+++ b/src/Specific/montgomery64_2e383m31/femul.c
@@ -1,272 +1,266 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
-{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
-{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
-{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
-{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
-{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0xef7bdef7bdef7bdfL, &_);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xffffffffffffffe1L, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
-{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
-{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
-{ uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0x7fffffffffffffffL, &x80);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
-{ uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
-{ uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
-{ uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
-{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
-{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
-{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
-{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
-{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
-{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
-{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
-{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
-{ uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
-{ uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
-{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
-{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
-{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
-{ uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
-{ uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
-{ uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
-{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
-{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
-{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
-{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
-{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
-{ uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
-{ uint64_t _; uint64_t x178 = _mulx_u64(x157, 0xef7bdef7bdef7bdfL, &_);
-{ uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xffffffffffffffe1L, &x182);
-{ uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
-{ uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
-{ uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
-{ uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
-{ uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0x7fffffffffffffffL, &x197);
-{ uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
-{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
-{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
-{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
-{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
-{ uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
-{ uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
-{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
-{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
-{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
-{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
-{ uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
-{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
-{ uint8_t x237 = (x236 + x176);
-{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
-{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
-{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
-{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
-{ uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
-{ uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
-{ uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
-{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
-{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
-{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
-{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
-{ uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
-{ uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
-{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
-{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
-{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
-{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
-{ uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
-{ uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
-{ uint64_t _; uint64_t x296 = _mulx_u64(x275, 0xef7bdef7bdef7bdfL, &_);
-{ uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xffffffffffffffe1L, &x300);
-{ uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
-{ uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
-{ uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
-{ uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
-{ uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0x7fffffffffffffffL, &x315);
-{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
-{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
-{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
-{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
-{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
-{ uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
-{ uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
-{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
-{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
-{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
-{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
-{ uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
-{ uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
-{ uint8_t x355 = (x354 + x294);
-{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
-{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
-{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
-{ uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
-{ uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
-{ uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
-{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
-{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
-{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
-{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
-{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
-{ uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
-{ uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
-{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
-{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
-{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
-{ uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
-{ uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
-{ uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
-{ uint64_t _; uint64_t x414 = _mulx_u64(x393, 0xef7bdef7bdef7bdfL, &_);
-{ uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xffffffffffffffe1L, &x418);
-{ uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
-{ uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
-{ uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
-{ uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
-{ uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0x7fffffffffffffffL, &x433);
-{ uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
-{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
-{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
-{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
-{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
-{ uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
-{ uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
-{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
-{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
-{ uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
-{ uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
-{ uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
-{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
-{ uint8_t x473 = (x472 + x412);
-{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
-{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
-{ uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
-{ uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
-{ uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
-{ uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
-{ uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
-{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
-{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
-{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
-{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
-{ uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
-{ uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
-{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
-{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
-{ uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
-{ uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
-{ uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
-{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
-{ uint64_t _; uint64_t x532 = _mulx_u64(x511, 0xef7bdef7bdef7bdfL, &_);
-{ uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xffffffffffffffe1L, &x536);
-{ uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
-{ uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
-{ uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
-{ uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
-{ uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0x7fffffffffffffffL, &x551);
-{ uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
-{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
-{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
-{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
-{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
-{ uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
-{ uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
-{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
-{ uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
-{ uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
-{ uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
-{ uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
-{ uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
-{ uint8_t x591 = (x590 + x530);
-{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
-{ uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
-{ uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
-{ uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
-{ uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
-{ uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
-{ uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
-{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
-{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
-{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
-{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
-{ uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
-{ uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
-{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
-{ uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
-{ uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
-{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
-{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
-{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
-{ uint64_t _; uint64_t x650 = _mulx_u64(x629, 0xef7bdef7bdef7bdfL, &_);
-{ uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xffffffffffffffe1L, &x654);
-{ uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
-{ uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
-{ uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
-{ uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
-{ uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0x7fffffffffffffffL, &x669);
-{ uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
-{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
-{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
-{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
-{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
-{ uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
-{ uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
-{ uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
-{ uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
-{ uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
-{ uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
-{ uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
-{ uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
-{ uint8_t x709 = (x708 + x648);
-{ uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xffffffffffffffe1L, &x711);
-{ uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
-{ uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
-{ uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
-{ uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
-{ uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0x7fffffffffffffffL, &x726);
-{ uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
-{ uint64_t x731 = cmovznz(x730, x726, x707);
-{ uint64_t x732 = cmovznz(x730, x723, x704);
-{ uint64_t x733 = cmovznz(x730, x720, x701);
-{ uint64_t x734 = cmovznz(x730, x717, x698);
-{ uint64_t x735 = cmovznz(x730, x714, x695);
-{ uint64_t x736 = cmovznz(x730, x711, x692);
-out[0] = x731;
-out[1] = x732;
-out[2] = x733;
-out[3] = x734;
-out[4] = x735;
-out[5] = x736;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+ { uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+ { uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+ { uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+ { uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+ { uint64_t _; uint64_t x61 = _mulx_u64(x25, 0xef7bdef7bdef7bdfL, &_);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xffffffffffffffe1L, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
+ { uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+ { uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+ { uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0x7fffffffffffffffL, &x80);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+ { uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
+ { uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
+ { uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
+ { uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
+ { uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
+ { uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
+ { uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
+ { uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
+ { uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
+ { uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
+ { uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
+ { uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
+ { uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
+ { uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+ { uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+ { uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+ { uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
+ { uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
+ { uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
+ { uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+ { uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+ { uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+ { uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+ { uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
+ { uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
+ { uint64_t _; uint64_t x178 = _mulx_u64(x157, 0xef7bdef7bdef7bdfL, &_);
+ { uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xffffffffffffffe1L, &x182);
+ { uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
+ { uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
+ { uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
+ { uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
+ { uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0x7fffffffffffffffL, &x197);
+ { uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
+ { uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+ { uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+ { uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
+ { uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
+ { uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
+ { uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
+ { uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
+ { uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
+ { uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
+ { uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
+ { uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
+ { uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
+ { uint8_t x237 = (x236 + x176);
+ { uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
+ { uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
+ { uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
+ { uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
+ { uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
+ { uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
+ { uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
+ { uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+ { uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+ { uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
+ { uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
+ { uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
+ { uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
+ { uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+ { uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+ { uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+ { uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
+ { uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
+ { uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
+ { uint64_t _; uint64_t x296 = _mulx_u64(x275, 0xef7bdef7bdef7bdfL, &_);
+ { uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xffffffffffffffe1L, &x300);
+ { uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
+ { uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
+ { uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
+ { uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
+ { uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0x7fffffffffffffffL, &x315);
+ { uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
+ { uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+ { uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
+ { uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
+ { uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
+ { uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
+ { uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
+ { uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
+ { uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
+ { uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
+ { uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
+ { uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
+ { uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
+ { uint8_t x355 = (x354 + x294);
+ { uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
+ { uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
+ { uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
+ { uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
+ { uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
+ { uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
+ { uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
+ { uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+ { uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
+ { uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
+ { uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
+ { uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
+ { uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
+ { uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+ { uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+ { uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
+ { uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
+ { uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
+ { uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
+ { uint64_t _; uint64_t x414 = _mulx_u64(x393, 0xef7bdef7bdef7bdfL, &_);
+ { uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xffffffffffffffe1L, &x418);
+ { uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
+ { uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
+ { uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
+ { uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
+ { uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0x7fffffffffffffffL, &x433);
+ { uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
+ { uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
+ { uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
+ { uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
+ { uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
+ { uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
+ { uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
+ { uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
+ { uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
+ { uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
+ { uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
+ { uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
+ { uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
+ { uint8_t x473 = (x472 + x412);
+ { uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
+ { uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
+ { uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
+ { uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
+ { uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
+ { uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
+ { uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
+ { uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
+ { uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
+ { uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
+ { uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
+ { uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
+ { uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
+ { uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+ { uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
+ { uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
+ { uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
+ { uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
+ { uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
+ { uint64_t _; uint64_t x532 = _mulx_u64(x511, 0xef7bdef7bdef7bdfL, &_);
+ { uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xffffffffffffffe1L, &x536);
+ { uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
+ { uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
+ { uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
+ { uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
+ { uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0x7fffffffffffffffL, &x551);
+ { uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
+ { uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
+ { uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
+ { uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
+ { uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
+ { uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
+ { uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
+ { uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
+ { uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
+ { uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
+ { uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
+ { uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
+ { uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
+ { uint8_t x591 = (x590 + x530);
+ { uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
+ { uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
+ { uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
+ { uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
+ { uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
+ { uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
+ { uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
+ { uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
+ { uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
+ { uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
+ { uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
+ { uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
+ { uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
+ { uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
+ { uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
+ { uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
+ { uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
+ { uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
+ { uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
+ { uint64_t _; uint64_t x650 = _mulx_u64(x629, 0xef7bdef7bdef7bdfL, &_);
+ { uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xffffffffffffffe1L, &x654);
+ { uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
+ { uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
+ { uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
+ { uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
+ { uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0x7fffffffffffffffL, &x669);
+ { uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
+ { uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
+ { uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
+ { uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
+ { uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
+ { uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
+ { uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
+ { uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
+ { uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
+ { uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
+ { uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
+ { uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
+ { uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
+ { uint8_t x709 = (x708 + x648);
+ { uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xffffffffffffffe1L, &x711);
+ { uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
+ { uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
+ { uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
+ { uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
+ { uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0x7fffffffffffffffL, &x726);
+ { uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
+ { uint64_t x731 = cmovznz(x730, x726, x707);
+ { uint64_t x732 = cmovznz(x730, x723, x704);
+ { uint64_t x733 = cmovznz(x730, x720, x701);
+ { uint64_t x734 = cmovznz(x730, x717, x698);
+ { uint64_t x735 = cmovznz(x730, x714, x695);
+ { uint64_t x736 = cmovznz(x730, x711, x692);
+ out[0] = x736;
+ out[1] = x735;
+ out[2] = x734;
+ out[3] = x733;
+ out[4] = x732;
+ out[5] = x731;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e383m31/fenz.c b/src/Specific/montgomery64_2e383m31/fenz.c
index ad3763a14..c93f74257 100644
--- a/src/Specific/montgomery64_2e383m31/fenz.c
+++ b/src/Specific/montgomery64_2e383m31/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x11 = (x10 | x9);
-{ uint64_t x12 = (x8 | x11);
-{ uint64_t x13 = (x6 | x12);
-{ uint64_t x14 = (x4 | x13);
-{ uint64_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x11 = (x10 | x9);
+ { uint64_t x12 = (x8 | x11);
+ { uint64_t x13 = (x6 | x12);
+ { uint64_t x14 = (x4 | x13);
+ { uint64_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e383m31/feopp.c b/src/Specific/montgomery64_2e383m31/feopp.c
index 3942002b4..6c2eff685 100644
--- a/src/Specific/montgomery64_2e383m31/feopp.c
+++ b/src/Specific/montgomery64_2e383m31/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
-{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
-{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
-{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
-{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
-{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
-{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
-{ uint64_t x30 = (x29 & 0xffffffffffffffe1L);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
-{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
-{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
-{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
-{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
-{ uint64_t x50 = (x29 & 0x7fffffffffffffffL);
-{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+ { uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+ { uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+ { uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+ { uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+ { uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+ { uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+ { uint64_t x30 = (x29 & 0xffffffffffffffe1L);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+ { uint64_t x34 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+ { uint64_t x38 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+ { uint64_t x42 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+ { uint64_t x46 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+ { uint64_t x50 = (x29 & 0x7fffffffffffffffL);
+ { uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e383m31/fesub.c b/src/Specific/montgomery64_2e383m31/fesub.c
index 3e356e36c..981f1909f 100644
--- a/src/Specific/montgomery64_2e383m31/fesub.c
+++ b/src/Specific/montgomery64_2e383m31/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
-{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
-{ uint64_t x43 = (x42 & 0xffffffffffffffe1L);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
-{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
-{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
-{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
-{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
-{ uint64_t x63 = (x42 & 0x7fffffffffffffffL);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+ { uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+ { uint64_t x43 = (x42 & 0xffffffffffffffe1L);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+ { uint64_t x47 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+ { uint64_t x51 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+ { uint64_t x55 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+ { uint64_t x59 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+ { uint64_t x63 = (x42 & 0x7fffffffffffffffL);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e383m421/feadd.c b/src/Specific/montgomery64_2e383m421/feadd.c
index 6e928a84a..2f20efda6 100644
--- a/src/Specific/montgomery64_2e383m421/feadd.c
+++ b/src/Specific/montgomery64_2e383m421/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
-{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xfffffffffffffe5bL, &x43);
-{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
-{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
-{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
-{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
-{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x7fffffffffffffffL, &x58);
-{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
-{ uint64_t x63 = cmovznz(x62, x58, x40);
-{ uint64_t x64 = cmovznz(x62, x55, x37);
-{ uint64_t x65 = cmovznz(x62, x52, x34);
-{ uint64_t x66 = cmovznz(x62, x49, x31);
-{ uint64_t x67 = cmovznz(x62, x46, x28);
-{ uint64_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+ { uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xfffffffffffffe5bL, &x43);
+ { uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+ { uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+ { uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+ { uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+ { uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x7fffffffffffffffL, &x58);
+ { uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+ { uint64_t x63 = cmovznz(x62, x58, x40);
+ { uint64_t x64 = cmovznz(x62, x55, x37);
+ { uint64_t x65 = cmovznz(x62, x52, x34);
+ { uint64_t x66 = cmovznz(x62, x49, x31);
+ { uint64_t x67 = cmovznz(x62, x46, x28);
+ { uint64_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e383m421/femul.c b/src/Specific/montgomery64_2e383m421/femul.c
index 0ebaa7ed4..2b59a2b37 100644
--- a/src/Specific/montgomery64_2e383m421/femul.c
+++ b/src/Specific/montgomery64_2e383m421/femul.c
@@ -1,272 +1,266 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
-{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
-{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
-{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
-{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
-{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0xcebeef94fa86fe2dL, &_);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xfffffffffffffe5bL, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
-{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
-{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
-{ uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0x7fffffffffffffffL, &x80);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
-{ uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
-{ uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
-{ uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
-{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
-{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
-{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
-{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
-{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
-{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
-{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
-{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
-{ uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
-{ uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
-{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
-{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
-{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
-{ uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
-{ uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
-{ uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
-{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
-{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
-{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
-{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
-{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
-{ uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
-{ uint64_t _; uint64_t x178 = _mulx_u64(x157, 0xcebeef94fa86fe2dL, &_);
-{ uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xfffffffffffffe5bL, &x182);
-{ uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
-{ uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
-{ uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
-{ uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
-{ uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0x7fffffffffffffffL, &x197);
-{ uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
-{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
-{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
-{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
-{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
-{ uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
-{ uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
-{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
-{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
-{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
-{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
-{ uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
-{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
-{ uint8_t x237 = (x236 + x176);
-{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
-{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
-{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
-{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
-{ uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
-{ uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
-{ uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
-{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
-{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
-{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
-{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
-{ uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
-{ uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
-{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
-{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
-{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
-{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
-{ uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
-{ uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
-{ uint64_t _; uint64_t x296 = _mulx_u64(x275, 0xcebeef94fa86fe2dL, &_);
-{ uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xfffffffffffffe5bL, &x300);
-{ uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
-{ uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
-{ uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
-{ uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
-{ uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0x7fffffffffffffffL, &x315);
-{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
-{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
-{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
-{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
-{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
-{ uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
-{ uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
-{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
-{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
-{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
-{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
-{ uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
-{ uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
-{ uint8_t x355 = (x354 + x294);
-{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
-{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
-{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
-{ uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
-{ uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
-{ uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
-{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
-{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
-{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
-{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
-{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
-{ uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
-{ uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
-{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
-{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
-{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
-{ uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
-{ uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
-{ uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
-{ uint64_t _; uint64_t x414 = _mulx_u64(x393, 0xcebeef94fa86fe2dL, &_);
-{ uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xfffffffffffffe5bL, &x418);
-{ uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
-{ uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
-{ uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
-{ uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
-{ uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0x7fffffffffffffffL, &x433);
-{ uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
-{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
-{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
-{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
-{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
-{ uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
-{ uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
-{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
-{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
-{ uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
-{ uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
-{ uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
-{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
-{ uint8_t x473 = (x472 + x412);
-{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
-{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
-{ uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
-{ uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
-{ uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
-{ uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
-{ uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
-{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
-{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
-{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
-{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
-{ uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
-{ uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
-{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
-{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
-{ uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
-{ uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
-{ uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
-{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
-{ uint64_t _; uint64_t x532 = _mulx_u64(x511, 0xcebeef94fa86fe2dL, &_);
-{ uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xfffffffffffffe5bL, &x536);
-{ uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
-{ uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
-{ uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
-{ uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
-{ uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0x7fffffffffffffffL, &x551);
-{ uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
-{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
-{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
-{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
-{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
-{ uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
-{ uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
-{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
-{ uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
-{ uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
-{ uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
-{ uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
-{ uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
-{ uint8_t x591 = (x590 + x530);
-{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
-{ uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
-{ uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
-{ uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
-{ uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
-{ uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
-{ uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
-{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
-{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
-{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
-{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
-{ uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
-{ uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
-{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
-{ uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
-{ uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
-{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
-{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
-{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
-{ uint64_t _; uint64_t x650 = _mulx_u64(x629, 0xcebeef94fa86fe2dL, &_);
-{ uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xfffffffffffffe5bL, &x654);
-{ uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
-{ uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
-{ uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
-{ uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
-{ uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0x7fffffffffffffffL, &x669);
-{ uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
-{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
-{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
-{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
-{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
-{ uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
-{ uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
-{ uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
-{ uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
-{ uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
-{ uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
-{ uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
-{ uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
-{ uint8_t x709 = (x708 + x648);
-{ uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xfffffffffffffe5bL, &x711);
-{ uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
-{ uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
-{ uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
-{ uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
-{ uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0x7fffffffffffffffL, &x726);
-{ uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
-{ uint64_t x731 = cmovznz(x730, x726, x707);
-{ uint64_t x732 = cmovznz(x730, x723, x704);
-{ uint64_t x733 = cmovznz(x730, x720, x701);
-{ uint64_t x734 = cmovznz(x730, x717, x698);
-{ uint64_t x735 = cmovznz(x730, x714, x695);
-{ uint64_t x736 = cmovznz(x730, x711, x692);
-out[0] = x731;
-out[1] = x732;
-out[2] = x733;
-out[3] = x734;
-out[4] = x735;
-out[5] = x736;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+ { uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+ { uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+ { uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+ { uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+ { uint64_t _; uint64_t x61 = _mulx_u64(x25, 0xcebeef94fa86fe2dL, &_);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xfffffffffffffe5bL, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
+ { uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+ { uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+ { uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0x7fffffffffffffffL, &x80);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+ { uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
+ { uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
+ { uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
+ { uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
+ { uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
+ { uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
+ { uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
+ { uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
+ { uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
+ { uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
+ { uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
+ { uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
+ { uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
+ { uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+ { uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+ { uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+ { uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
+ { uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
+ { uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
+ { uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+ { uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+ { uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+ { uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+ { uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
+ { uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
+ { uint64_t _; uint64_t x178 = _mulx_u64(x157, 0xcebeef94fa86fe2dL, &_);
+ { uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xfffffffffffffe5bL, &x182);
+ { uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
+ { uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
+ { uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
+ { uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
+ { uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0x7fffffffffffffffL, &x197);
+ { uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
+ { uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+ { uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+ { uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
+ { uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
+ { uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
+ { uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
+ { uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
+ { uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
+ { uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
+ { uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
+ { uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
+ { uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
+ { uint8_t x237 = (x236 + x176);
+ { uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
+ { uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
+ { uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
+ { uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
+ { uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
+ { uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
+ { uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
+ { uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+ { uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+ { uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
+ { uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
+ { uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
+ { uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
+ { uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+ { uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+ { uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+ { uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
+ { uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
+ { uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
+ { uint64_t _; uint64_t x296 = _mulx_u64(x275, 0xcebeef94fa86fe2dL, &_);
+ { uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xfffffffffffffe5bL, &x300);
+ { uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
+ { uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
+ { uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
+ { uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
+ { uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0x7fffffffffffffffL, &x315);
+ { uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
+ { uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+ { uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
+ { uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
+ { uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
+ { uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
+ { uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
+ { uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
+ { uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
+ { uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
+ { uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
+ { uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
+ { uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
+ { uint8_t x355 = (x354 + x294);
+ { uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
+ { uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
+ { uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
+ { uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
+ { uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
+ { uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
+ { uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
+ { uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+ { uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
+ { uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
+ { uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
+ { uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
+ { uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
+ { uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+ { uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+ { uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
+ { uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
+ { uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
+ { uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
+ { uint64_t _; uint64_t x414 = _mulx_u64(x393, 0xcebeef94fa86fe2dL, &_);
+ { uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xfffffffffffffe5bL, &x418);
+ { uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
+ { uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
+ { uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
+ { uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
+ { uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0x7fffffffffffffffL, &x433);
+ { uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
+ { uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
+ { uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
+ { uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
+ { uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
+ { uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
+ { uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
+ { uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
+ { uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
+ { uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
+ { uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
+ { uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
+ { uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
+ { uint8_t x473 = (x472 + x412);
+ { uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
+ { uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
+ { uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
+ { uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
+ { uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
+ { uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
+ { uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
+ { uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
+ { uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
+ { uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
+ { uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
+ { uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
+ { uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
+ { uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+ { uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
+ { uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
+ { uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
+ { uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
+ { uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
+ { uint64_t _; uint64_t x532 = _mulx_u64(x511, 0xcebeef94fa86fe2dL, &_);
+ { uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xfffffffffffffe5bL, &x536);
+ { uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
+ { uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
+ { uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
+ { uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
+ { uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0x7fffffffffffffffL, &x551);
+ { uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
+ { uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
+ { uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
+ { uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
+ { uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
+ { uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
+ { uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
+ { uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
+ { uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
+ { uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
+ { uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
+ { uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
+ { uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
+ { uint8_t x591 = (x590 + x530);
+ { uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
+ { uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
+ { uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
+ { uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
+ { uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
+ { uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
+ { uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
+ { uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
+ { uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
+ { uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
+ { uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
+ { uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
+ { uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
+ { uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
+ { uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
+ { uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
+ { uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
+ { uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
+ { uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
+ { uint64_t _; uint64_t x650 = _mulx_u64(x629, 0xcebeef94fa86fe2dL, &_);
+ { uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xfffffffffffffe5bL, &x654);
+ { uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
+ { uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
+ { uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
+ { uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
+ { uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0x7fffffffffffffffL, &x669);
+ { uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
+ { uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
+ { uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
+ { uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
+ { uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
+ { uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
+ { uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
+ { uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
+ { uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
+ { uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
+ { uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
+ { uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
+ { uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
+ { uint8_t x709 = (x708 + x648);
+ { uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xfffffffffffffe5bL, &x711);
+ { uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
+ { uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
+ { uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
+ { uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
+ { uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0x7fffffffffffffffL, &x726);
+ { uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
+ { uint64_t x731 = cmovznz(x730, x726, x707);
+ { uint64_t x732 = cmovznz(x730, x723, x704);
+ { uint64_t x733 = cmovznz(x730, x720, x701);
+ { uint64_t x734 = cmovznz(x730, x717, x698);
+ { uint64_t x735 = cmovznz(x730, x714, x695);
+ { uint64_t x736 = cmovznz(x730, x711, x692);
+ out[0] = x736;
+ out[1] = x735;
+ out[2] = x734;
+ out[3] = x733;
+ out[4] = x732;
+ out[5] = x731;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e383m421/fenz.c b/src/Specific/montgomery64_2e383m421/fenz.c
index ad3763a14..c93f74257 100644
--- a/src/Specific/montgomery64_2e383m421/fenz.c
+++ b/src/Specific/montgomery64_2e383m421/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x11 = (x10 | x9);
-{ uint64_t x12 = (x8 | x11);
-{ uint64_t x13 = (x6 | x12);
-{ uint64_t x14 = (x4 | x13);
-{ uint64_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x11 = (x10 | x9);
+ { uint64_t x12 = (x8 | x11);
+ { uint64_t x13 = (x6 | x12);
+ { uint64_t x14 = (x4 | x13);
+ { uint64_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e383m421/feopp.c b/src/Specific/montgomery64_2e383m421/feopp.c
index 3b3069cb7..b0f48857d 100644
--- a/src/Specific/montgomery64_2e383m421/feopp.c
+++ b/src/Specific/montgomery64_2e383m421/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
-{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
-{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
-{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
-{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
-{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
-{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
-{ uint64_t x30 = (x29 & 0xfffffffffffffe5bL);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
-{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
-{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
-{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
-{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
-{ uint64_t x50 = (x29 & 0x7fffffffffffffffL);
-{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+ { uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+ { uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+ { uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+ { uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+ { uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+ { uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+ { uint64_t x30 = (x29 & 0xfffffffffffffe5bL);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+ { uint64_t x34 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+ { uint64_t x38 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+ { uint64_t x42 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+ { uint64_t x46 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+ { uint64_t x50 = (x29 & 0x7fffffffffffffffL);
+ { uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e383m421/fesub.c b/src/Specific/montgomery64_2e383m421/fesub.c
index 2fd6b28f2..f6b127ad9 100644
--- a/src/Specific/montgomery64_2e383m421/fesub.c
+++ b/src/Specific/montgomery64_2e383m421/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
-{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
-{ uint64_t x43 = (x42 & 0xfffffffffffffe5bL);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
-{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
-{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
-{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
-{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
-{ uint64_t x63 = (x42 & 0x7fffffffffffffffL);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+ { uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+ { uint64_t x43 = (x42 & 0xfffffffffffffe5bL);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+ { uint64_t x47 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+ { uint64_t x51 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+ { uint64_t x55 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+ { uint64_t x59 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+ { uint64_t x63 = (x42 & 0x7fffffffffffffffL);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feadd.c b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feadd.c
index 83bd085ec..ba863237a 100644
--- a/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feadd.c
+++ b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
-{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffff, &x43);
-{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffff00000000L, &x46);
-{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xfffffffffffffffeL, &x49);
-{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
-{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
-{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0xffffffffffffffffL, &x58);
-{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
-{ uint64_t x63 = cmovznz(x62, x58, x40);
-{ uint64_t x64 = cmovznz(x62, x55, x37);
-{ uint64_t x65 = cmovznz(x62, x52, x34);
-{ uint64_t x66 = cmovznz(x62, x49, x31);
-{ uint64_t x67 = cmovznz(x62, x46, x28);
-{ uint64_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+ { uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffff, &x43);
+ { uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffff00000000L, &x46);
+ { uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xfffffffffffffffeL, &x49);
+ { uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+ { uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+ { uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0xffffffffffffffffL, &x58);
+ { uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+ { uint64_t x63 = cmovznz(x62, x58, x40);
+ { uint64_t x64 = cmovznz(x62, x55, x37);
+ { uint64_t x65 = cmovznz(x62, x52, x34);
+ { uint64_t x66 = cmovznz(x62, x49, x31);
+ { uint64_t x67 = cmovznz(x62, x46, x28);
+ { uint64_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/femul.c b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/femul.c
index 5fd52e99c..8b9d2ea10 100644
--- a/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/femul.c
+++ b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/femul.c
@@ -1,272 +1,266 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
-{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
-{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
-{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
-{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
-{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0x100000001, &_);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xffffffff, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffff00000000L, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xfffffffffffffffeL, &x71);
-{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
-{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
-{ uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0xffffffffffffffffL, &x80);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
-{ uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
-{ uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
-{ uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
-{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
-{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
-{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
-{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
-{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
-{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
-{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
-{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
-{ uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
-{ uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
-{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
-{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
-{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
-{ uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
-{ uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
-{ uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
-{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
-{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
-{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
-{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
-{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
-{ uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
-{ uint64_t _; uint64_t x178 = _mulx_u64(x157, 0x100000001, &_);
-{ uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xffffffff, &x182);
-{ uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffff00000000L, &x185);
-{ uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xfffffffffffffffeL, &x188);
-{ uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
-{ uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
-{ uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0xffffffffffffffffL, &x197);
-{ uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
-{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
-{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
-{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
-{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
-{ uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
-{ uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
-{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
-{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
-{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
-{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
-{ uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
-{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
-{ uint8_t x237 = (x236 + x176);
-{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
-{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
-{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
-{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
-{ uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
-{ uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
-{ uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
-{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
-{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
-{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
-{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
-{ uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
-{ uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
-{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
-{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
-{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
-{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
-{ uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
-{ uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
-{ uint64_t _; uint64_t x296 = _mulx_u64(x275, 0x100000001, &_);
-{ uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xffffffff, &x300);
-{ uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffff00000000L, &x303);
-{ uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xfffffffffffffffeL, &x306);
-{ uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
-{ uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
-{ uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0xffffffffffffffffL, &x315);
-{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
-{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
-{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
-{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
-{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
-{ uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
-{ uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
-{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
-{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
-{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
-{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
-{ uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
-{ uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
-{ uint8_t x355 = (x354 + x294);
-{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
-{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
-{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
-{ uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
-{ uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
-{ uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
-{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
-{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
-{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
-{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
-{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
-{ uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
-{ uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
-{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
-{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
-{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
-{ uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
-{ uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
-{ uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
-{ uint64_t _; uint64_t x414 = _mulx_u64(x393, 0x100000001, &_);
-{ uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xffffffff, &x418);
-{ uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffff00000000L, &x421);
-{ uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xfffffffffffffffeL, &x424);
-{ uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
-{ uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
-{ uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0xffffffffffffffffL, &x433);
-{ uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
-{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
-{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
-{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
-{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
-{ uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
-{ uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
-{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
-{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
-{ uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
-{ uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
-{ uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
-{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
-{ uint8_t x473 = (x472 + x412);
-{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
-{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
-{ uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
-{ uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
-{ uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
-{ uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
-{ uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
-{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
-{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
-{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
-{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
-{ uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
-{ uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
-{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
-{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
-{ uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
-{ uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
-{ uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
-{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
-{ uint64_t _; uint64_t x532 = _mulx_u64(x511, 0x100000001, &_);
-{ uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xffffffff, &x536);
-{ uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffff00000000L, &x539);
-{ uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xfffffffffffffffeL, &x542);
-{ uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
-{ uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
-{ uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0xffffffffffffffffL, &x551);
-{ uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
-{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
-{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
-{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
-{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
-{ uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
-{ uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
-{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
-{ uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
-{ uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
-{ uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
-{ uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
-{ uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
-{ uint8_t x591 = (x590 + x530);
-{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
-{ uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
-{ uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
-{ uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
-{ uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
-{ uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
-{ uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
-{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
-{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
-{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
-{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
-{ uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
-{ uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
-{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
-{ uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
-{ uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
-{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
-{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
-{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
-{ uint64_t _; uint64_t x650 = _mulx_u64(x629, 0x100000001, &_);
-{ uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xffffffff, &x654);
-{ uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffff00000000L, &x657);
-{ uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xfffffffffffffffeL, &x660);
-{ uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
-{ uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
-{ uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0xffffffffffffffffL, &x669);
-{ uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
-{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
-{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
-{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
-{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
-{ uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
-{ uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
-{ uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
-{ uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
-{ uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
-{ uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
-{ uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
-{ uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
-{ uint8_t x709 = (x708 + x648);
-{ uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xffffffff, &x711);
-{ uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffff00000000L, &x714);
-{ uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xfffffffffffffffeL, &x717);
-{ uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
-{ uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
-{ uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0xffffffffffffffffL, &x726);
-{ uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
-{ uint64_t x731 = cmovznz(x730, x726, x707);
-{ uint64_t x732 = cmovznz(x730, x723, x704);
-{ uint64_t x733 = cmovznz(x730, x720, x701);
-{ uint64_t x734 = cmovznz(x730, x717, x698);
-{ uint64_t x735 = cmovznz(x730, x714, x695);
-{ uint64_t x736 = cmovznz(x730, x711, x692);
-out[0] = x731;
-out[1] = x732;
-out[2] = x733;
-out[3] = x734;
-out[4] = x735;
-out[5] = x736;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+ { uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+ { uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+ { uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+ { uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+ { uint64_t _; uint64_t x61 = _mulx_u64(x25, 0x100000001, &_);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xffffffff, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffff00000000L, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xfffffffffffffffeL, &x71);
+ { uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+ { uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+ { uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0xffffffffffffffffL, &x80);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+ { uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
+ { uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
+ { uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
+ { uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
+ { uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
+ { uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
+ { uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
+ { uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
+ { uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
+ { uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
+ { uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
+ { uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
+ { uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
+ { uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+ { uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+ { uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+ { uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
+ { uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
+ { uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
+ { uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+ { uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+ { uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+ { uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+ { uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
+ { uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
+ { uint64_t _; uint64_t x178 = _mulx_u64(x157, 0x100000001, &_);
+ { uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xffffffff, &x182);
+ { uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffff00000000L, &x185);
+ { uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xfffffffffffffffeL, &x188);
+ { uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
+ { uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
+ { uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0xffffffffffffffffL, &x197);
+ { uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
+ { uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+ { uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+ { uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
+ { uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
+ { uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
+ { uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
+ { uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
+ { uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
+ { uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
+ { uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
+ { uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
+ { uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
+ { uint8_t x237 = (x236 + x176);
+ { uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
+ { uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
+ { uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
+ { uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
+ { uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
+ { uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
+ { uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
+ { uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+ { uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+ { uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
+ { uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
+ { uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
+ { uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
+ { uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+ { uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+ { uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+ { uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
+ { uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
+ { uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
+ { uint64_t _; uint64_t x296 = _mulx_u64(x275, 0x100000001, &_);
+ { uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xffffffff, &x300);
+ { uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffff00000000L, &x303);
+ { uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xfffffffffffffffeL, &x306);
+ { uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
+ { uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
+ { uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0xffffffffffffffffL, &x315);
+ { uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
+ { uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+ { uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
+ { uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
+ { uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
+ { uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
+ { uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
+ { uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
+ { uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
+ { uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
+ { uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
+ { uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
+ { uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
+ { uint8_t x355 = (x354 + x294);
+ { uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
+ { uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
+ { uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
+ { uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
+ { uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
+ { uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
+ { uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
+ { uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+ { uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
+ { uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
+ { uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
+ { uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
+ { uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
+ { uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+ { uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+ { uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
+ { uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
+ { uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
+ { uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
+ { uint64_t _; uint64_t x414 = _mulx_u64(x393, 0x100000001, &_);
+ { uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xffffffff, &x418);
+ { uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffff00000000L, &x421);
+ { uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xfffffffffffffffeL, &x424);
+ { uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
+ { uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
+ { uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0xffffffffffffffffL, &x433);
+ { uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
+ { uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
+ { uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
+ { uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
+ { uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
+ { uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
+ { uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
+ { uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
+ { uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
+ { uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
+ { uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
+ { uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
+ { uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
+ { uint8_t x473 = (x472 + x412);
+ { uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
+ { uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
+ { uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
+ { uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
+ { uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
+ { uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
+ { uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
+ { uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
+ { uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
+ { uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
+ { uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
+ { uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
+ { uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
+ { uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+ { uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
+ { uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
+ { uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
+ { uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
+ { uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
+ { uint64_t _; uint64_t x532 = _mulx_u64(x511, 0x100000001, &_);
+ { uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xffffffff, &x536);
+ { uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffff00000000L, &x539);
+ { uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xfffffffffffffffeL, &x542);
+ { uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
+ { uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
+ { uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0xffffffffffffffffL, &x551);
+ { uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
+ { uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
+ { uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
+ { uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
+ { uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
+ { uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
+ { uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
+ { uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
+ { uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
+ { uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
+ { uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
+ { uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
+ { uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
+ { uint8_t x591 = (x590 + x530);
+ { uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
+ { uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
+ { uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
+ { uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
+ { uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
+ { uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
+ { uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
+ { uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
+ { uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
+ { uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
+ { uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
+ { uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
+ { uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
+ { uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
+ { uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
+ { uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
+ { uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
+ { uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
+ { uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
+ { uint64_t _; uint64_t x650 = _mulx_u64(x629, 0x100000001, &_);
+ { uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xffffffff, &x654);
+ { uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffff00000000L, &x657);
+ { uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xfffffffffffffffeL, &x660);
+ { uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
+ { uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
+ { uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0xffffffffffffffffL, &x669);
+ { uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
+ { uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
+ { uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
+ { uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
+ { uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
+ { uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
+ { uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
+ { uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
+ { uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
+ { uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
+ { uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
+ { uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
+ { uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
+ { uint8_t x709 = (x708 + x648);
+ { uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xffffffff, &x711);
+ { uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffff00000000L, &x714);
+ { uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xfffffffffffffffeL, &x717);
+ { uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
+ { uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
+ { uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0xffffffffffffffffL, &x726);
+ { uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
+ { uint64_t x731 = cmovznz(x730, x726, x707);
+ { uint64_t x732 = cmovznz(x730, x723, x704);
+ { uint64_t x733 = cmovznz(x730, x720, x701);
+ { uint64_t x734 = cmovznz(x730, x717, x698);
+ { uint64_t x735 = cmovznz(x730, x714, x695);
+ { uint64_t x736 = cmovznz(x730, x711, x692);
+ out[0] = x736;
+ out[1] = x735;
+ out[2] = x734;
+ out[3] = x733;
+ out[4] = x732;
+ out[5] = x731;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fenz.c b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fenz.c
index ad3763a14..c93f74257 100644
--- a/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fenz.c
+++ b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x11 = (x10 | x9);
-{ uint64_t x12 = (x8 | x11);
-{ uint64_t x13 = (x6 | x12);
-{ uint64_t x14 = (x4 | x13);
-{ uint64_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x11 = (x10 | x9);
+ { uint64_t x12 = (x8 | x11);
+ { uint64_t x13 = (x6 | x12);
+ { uint64_t x14 = (x4 | x13);
+ { uint64_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feopp.c b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feopp.c
index 2bf91b19f..8d4647b66 100644
--- a/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feopp.c
+++ b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
-{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
-{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
-{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
-{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
-{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
-{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
-{ uint64_t x30 = (x29 & 0xffffffff);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
-{ uint64_t x34 = (x29 & 0xffffffff00000000L);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
-{ uint64_t x38 = (x29 & 0xfffffffffffffffeL);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
-{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
-{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
-{ uint64_t x50 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+ { uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+ { uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+ { uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+ { uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+ { uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+ { uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+ { uint64_t x30 = (x29 & 0xffffffff);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+ { uint64_t x34 = (x29 & 0xffffffff00000000L);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+ { uint64_t x38 = (x29 & 0xfffffffffffffffeL);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+ { uint64_t x42 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+ { uint64_t x46 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+ { uint64_t x50 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fesub.c b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fesub.c
index 2896cbb98..f0e9bcdef 100644
--- a/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fesub.c
+++ b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
-{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
-{ uint64_t x43 = (x42 & 0xffffffff);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
-{ uint64_t x47 = (x42 & 0xffffffff00000000L);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
-{ uint64_t x51 = (x42 & 0xfffffffffffffffeL);
-{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
-{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
-{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
-{ uint64_t x63 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+ { uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+ { uint64_t x43 = (x42 & 0xffffffff);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+ { uint64_t x47 = (x42 & 0xffffffff00000000L);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+ { uint64_t x51 = (x42 & 0xfffffffffffffffeL);
+ { uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+ { uint64_t x55 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+ { uint64_t x59 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+ { uint64_t x63 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e384m317/feadd.c b/src/Specific/montgomery64_2e384m317/feadd.c
index 3ee6d5d23..72c3471ad 100644
--- a/src/Specific/montgomery64_2e384m317/feadd.c
+++ b/src/Specific/montgomery64_2e384m317/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
-{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xfffffffffffffec3L, &x43);
-{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
-{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
-{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
-{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
-{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0xffffffffffffffffL, &x58);
-{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
-{ uint64_t x63 = cmovznz(x62, x58, x40);
-{ uint64_t x64 = cmovznz(x62, x55, x37);
-{ uint64_t x65 = cmovznz(x62, x52, x34);
-{ uint64_t x66 = cmovznz(x62, x49, x31);
-{ uint64_t x67 = cmovznz(x62, x46, x28);
-{ uint64_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+ { uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xfffffffffffffec3L, &x43);
+ { uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+ { uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+ { uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+ { uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+ { uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0xffffffffffffffffL, &x58);
+ { uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+ { uint64_t x63 = cmovznz(x62, x58, x40);
+ { uint64_t x64 = cmovznz(x62, x55, x37);
+ { uint64_t x65 = cmovznz(x62, x52, x34);
+ { uint64_t x66 = cmovznz(x62, x49, x31);
+ { uint64_t x67 = cmovznz(x62, x46, x28);
+ { uint64_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e384m317/femul.c b/src/Specific/montgomery64_2e384m317/femul.c
index 1516efea0..d9c063dd5 100644
--- a/src/Specific/montgomery64_2e384m317/femul.c
+++ b/src/Specific/montgomery64_2e384m317/femul.c
@@ -1,272 +1,266 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
-{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
-{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
-{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
-{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
-{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0xec9e48ae6f71de15L, &_);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xfffffffffffffec3L, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
-{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
-{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
-{ uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0xffffffffffffffffL, &x80);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
-{ uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
-{ uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
-{ uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
-{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
-{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
-{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
-{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
-{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
-{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
-{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
-{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
-{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
-{ uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
-{ uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
-{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
-{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
-{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
-{ uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
-{ uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
-{ uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
-{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
-{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
-{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
-{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
-{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
-{ uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
-{ uint64_t _; uint64_t x178 = _mulx_u64(x157, 0xec9e48ae6f71de15L, &_);
-{ uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xfffffffffffffec3L, &x182);
-{ uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
-{ uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
-{ uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
-{ uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
-{ uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0xffffffffffffffffL, &x197);
-{ uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
-{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
-{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
-{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
-{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
-{ uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
-{ uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
-{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
-{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
-{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
-{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
-{ uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
-{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
-{ uint8_t x237 = (x236 + x176);
-{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
-{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
-{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
-{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
-{ uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
-{ uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
-{ uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
-{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
-{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
-{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
-{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
-{ uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
-{ uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
-{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
-{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
-{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
-{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
-{ uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
-{ uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
-{ uint64_t _; uint64_t x296 = _mulx_u64(x275, 0xec9e48ae6f71de15L, &_);
-{ uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xfffffffffffffec3L, &x300);
-{ uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
-{ uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
-{ uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
-{ uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
-{ uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0xffffffffffffffffL, &x315);
-{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
-{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
-{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
-{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
-{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
-{ uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
-{ uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
-{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
-{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
-{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
-{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
-{ uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
-{ uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
-{ uint8_t x355 = (x354 + x294);
-{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
-{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
-{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
-{ uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
-{ uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
-{ uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
-{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
-{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
-{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
-{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
-{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
-{ uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
-{ uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
-{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
-{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
-{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
-{ uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
-{ uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
-{ uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
-{ uint64_t _; uint64_t x414 = _mulx_u64(x393, 0xec9e48ae6f71de15L, &_);
-{ uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xfffffffffffffec3L, &x418);
-{ uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
-{ uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
-{ uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
-{ uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
-{ uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0xffffffffffffffffL, &x433);
-{ uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
-{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
-{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
-{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
-{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
-{ uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
-{ uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
-{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
-{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
-{ uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
-{ uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
-{ uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
-{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
-{ uint8_t x473 = (x472 + x412);
-{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
-{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
-{ uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
-{ uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
-{ uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
-{ uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
-{ uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
-{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
-{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
-{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
-{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
-{ uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
-{ uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
-{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
-{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
-{ uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
-{ uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
-{ uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
-{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
-{ uint64_t _; uint64_t x532 = _mulx_u64(x511, 0xec9e48ae6f71de15L, &_);
-{ uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xfffffffffffffec3L, &x536);
-{ uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
-{ uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
-{ uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
-{ uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
-{ uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0xffffffffffffffffL, &x551);
-{ uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
-{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
-{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
-{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
-{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
-{ uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
-{ uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
-{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
-{ uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
-{ uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
-{ uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
-{ uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
-{ uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
-{ uint8_t x591 = (x590 + x530);
-{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
-{ uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
-{ uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
-{ uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
-{ uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
-{ uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
-{ uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
-{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
-{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
-{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
-{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
-{ uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
-{ uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
-{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
-{ uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
-{ uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
-{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
-{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
-{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
-{ uint64_t _; uint64_t x650 = _mulx_u64(x629, 0xec9e48ae6f71de15L, &_);
-{ uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xfffffffffffffec3L, &x654);
-{ uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
-{ uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
-{ uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
-{ uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
-{ uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0xffffffffffffffffL, &x669);
-{ uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
-{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
-{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
-{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
-{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
-{ uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
-{ uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
-{ uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
-{ uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
-{ uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
-{ uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
-{ uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
-{ uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
-{ uint8_t x709 = (x708 + x648);
-{ uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xfffffffffffffec3L, &x711);
-{ uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
-{ uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
-{ uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
-{ uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
-{ uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0xffffffffffffffffL, &x726);
-{ uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
-{ uint64_t x731 = cmovznz(x730, x726, x707);
-{ uint64_t x732 = cmovznz(x730, x723, x704);
-{ uint64_t x733 = cmovznz(x730, x720, x701);
-{ uint64_t x734 = cmovznz(x730, x717, x698);
-{ uint64_t x735 = cmovznz(x730, x714, x695);
-{ uint64_t x736 = cmovznz(x730, x711, x692);
-out[0] = x731;
-out[1] = x732;
-out[2] = x733;
-out[3] = x734;
-out[4] = x735;
-out[5] = x736;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+ { uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+ { uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+ { uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+ { uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+ { uint64_t _; uint64_t x61 = _mulx_u64(x25, 0xec9e48ae6f71de15L, &_);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xfffffffffffffec3L, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
+ { uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+ { uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+ { uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0xffffffffffffffffL, &x80);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+ { uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
+ { uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
+ { uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
+ { uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
+ { uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
+ { uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
+ { uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
+ { uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
+ { uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
+ { uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
+ { uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
+ { uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
+ { uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
+ { uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
+ { uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+ { uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+ { uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+ { uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
+ { uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
+ { uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
+ { uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+ { uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+ { uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+ { uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+ { uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
+ { uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
+ { uint64_t _; uint64_t x178 = _mulx_u64(x157, 0xec9e48ae6f71de15L, &_);
+ { uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xfffffffffffffec3L, &x182);
+ { uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
+ { uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
+ { uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
+ { uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
+ { uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0xffffffffffffffffL, &x197);
+ { uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
+ { uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+ { uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+ { uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
+ { uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
+ { uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
+ { uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
+ { uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
+ { uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
+ { uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
+ { uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
+ { uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
+ { uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
+ { uint8_t x237 = (x236 + x176);
+ { uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
+ { uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
+ { uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
+ { uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
+ { uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
+ { uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
+ { uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
+ { uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+ { uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+ { uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
+ { uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
+ { uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
+ { uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
+ { uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+ { uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+ { uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+ { uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
+ { uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
+ { uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
+ { uint64_t _; uint64_t x296 = _mulx_u64(x275, 0xec9e48ae6f71de15L, &_);
+ { uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xfffffffffffffec3L, &x300);
+ { uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
+ { uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
+ { uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
+ { uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
+ { uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0xffffffffffffffffL, &x315);
+ { uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
+ { uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+ { uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
+ { uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
+ { uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
+ { uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
+ { uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
+ { uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
+ { uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
+ { uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
+ { uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
+ { uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
+ { uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
+ { uint8_t x355 = (x354 + x294);
+ { uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
+ { uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
+ { uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
+ { uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
+ { uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
+ { uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
+ { uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
+ { uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+ { uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
+ { uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
+ { uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
+ { uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
+ { uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
+ { uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+ { uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+ { uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
+ { uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
+ { uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
+ { uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
+ { uint64_t _; uint64_t x414 = _mulx_u64(x393, 0xec9e48ae6f71de15L, &_);
+ { uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xfffffffffffffec3L, &x418);
+ { uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
+ { uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
+ { uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
+ { uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
+ { uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0xffffffffffffffffL, &x433);
+ { uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
+ { uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
+ { uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
+ { uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
+ { uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
+ { uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
+ { uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
+ { uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
+ { uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
+ { uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
+ { uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
+ { uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
+ { uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
+ { uint8_t x473 = (x472 + x412);
+ { uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
+ { uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
+ { uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
+ { uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
+ { uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
+ { uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
+ { uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
+ { uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
+ { uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
+ { uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
+ { uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
+ { uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
+ { uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
+ { uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+ { uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
+ { uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
+ { uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
+ { uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
+ { uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
+ { uint64_t _; uint64_t x532 = _mulx_u64(x511, 0xec9e48ae6f71de15L, &_);
+ { uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xfffffffffffffec3L, &x536);
+ { uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
+ { uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
+ { uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
+ { uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
+ { uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0xffffffffffffffffL, &x551);
+ { uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
+ { uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
+ { uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
+ { uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
+ { uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
+ { uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
+ { uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
+ { uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
+ { uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
+ { uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
+ { uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
+ { uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
+ { uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
+ { uint8_t x591 = (x590 + x530);
+ { uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
+ { uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
+ { uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
+ { uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
+ { uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
+ { uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
+ { uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
+ { uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
+ { uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
+ { uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
+ { uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
+ { uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
+ { uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
+ { uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
+ { uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
+ { uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
+ { uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
+ { uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
+ { uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
+ { uint64_t _; uint64_t x650 = _mulx_u64(x629, 0xec9e48ae6f71de15L, &_);
+ { uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xfffffffffffffec3L, &x654);
+ { uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
+ { uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
+ { uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
+ { uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
+ { uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0xffffffffffffffffL, &x669);
+ { uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
+ { uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
+ { uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
+ { uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
+ { uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
+ { uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
+ { uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
+ { uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
+ { uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
+ { uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
+ { uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
+ { uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
+ { uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
+ { uint8_t x709 = (x708 + x648);
+ { uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xfffffffffffffec3L, &x711);
+ { uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
+ { uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
+ { uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
+ { uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
+ { uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0xffffffffffffffffL, &x726);
+ { uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
+ { uint64_t x731 = cmovznz(x730, x726, x707);
+ { uint64_t x732 = cmovznz(x730, x723, x704);
+ { uint64_t x733 = cmovznz(x730, x720, x701);
+ { uint64_t x734 = cmovznz(x730, x717, x698);
+ { uint64_t x735 = cmovznz(x730, x714, x695);
+ { uint64_t x736 = cmovznz(x730, x711, x692);
+ out[0] = x736;
+ out[1] = x735;
+ out[2] = x734;
+ out[3] = x733;
+ out[4] = x732;
+ out[5] = x731;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e384m317/fenz.c b/src/Specific/montgomery64_2e384m317/fenz.c
index ad3763a14..c93f74257 100644
--- a/src/Specific/montgomery64_2e384m317/fenz.c
+++ b/src/Specific/montgomery64_2e384m317/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x11 = (x10 | x9);
-{ uint64_t x12 = (x8 | x11);
-{ uint64_t x13 = (x6 | x12);
-{ uint64_t x14 = (x4 | x13);
-{ uint64_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x11 = (x10 | x9);
+ { uint64_t x12 = (x8 | x11);
+ { uint64_t x13 = (x6 | x12);
+ { uint64_t x14 = (x4 | x13);
+ { uint64_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e384m317/feopp.c b/src/Specific/montgomery64_2e384m317/feopp.c
index ccfbae4d7..62734643c 100644
--- a/src/Specific/montgomery64_2e384m317/feopp.c
+++ b/src/Specific/montgomery64_2e384m317/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
-{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
-{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
-{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
-{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
-{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
-{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
-{ uint64_t x30 = (x29 & 0xfffffffffffffec3L);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
-{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
-{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
-{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
-{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
-{ uint64_t x50 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+ { uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+ { uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+ { uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+ { uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+ { uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+ { uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+ { uint64_t x30 = (x29 & 0xfffffffffffffec3L);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+ { uint64_t x34 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+ { uint64_t x38 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+ { uint64_t x42 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+ { uint64_t x46 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+ { uint64_t x50 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e384m317/fesub.c b/src/Specific/montgomery64_2e384m317/fesub.c
index bc84edeb2..fe7f61d05 100644
--- a/src/Specific/montgomery64_2e384m317/fesub.c
+++ b/src/Specific/montgomery64_2e384m317/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
-{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
-{ uint64_t x43 = (x42 & 0xfffffffffffffec3L);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
-{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
-{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
-{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
-{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
-{ uint64_t x63 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+ { uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+ { uint64_t x43 = (x42 & 0xfffffffffffffec3L);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+ { uint64_t x47 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+ { uint64_t x51 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+ { uint64_t x55 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+ { uint64_t x59 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+ { uint64_t x63 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e384m5x2e368m1/feadd.c b/src/Specific/montgomery64_2e384m5x2e368m1/feadd.c
index bc9cb1765..0b6b23394 100644
--- a/src/Specific/montgomery64_2e384m5x2e368m1/feadd.c
+++ b/src/Specific/montgomery64_2e384m5x2e368m1/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
-{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffffffL, &x43);
-{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
-{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
-{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
-{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
-{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0xfffaffffffffffffL, &x58);
-{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
-{ uint64_t x63 = cmovznz(x62, x58, x40);
-{ uint64_t x64 = cmovznz(x62, x55, x37);
-{ uint64_t x65 = cmovznz(x62, x52, x34);
-{ uint64_t x66 = cmovznz(x62, x49, x31);
-{ uint64_t x67 = cmovznz(x62, x46, x28);
-{ uint64_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+ { uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffffffL, &x43);
+ { uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+ { uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+ { uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+ { uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+ { uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0xfffaffffffffffffL, &x58);
+ { uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+ { uint64_t x63 = cmovznz(x62, x58, x40);
+ { uint64_t x64 = cmovznz(x62, x55, x37);
+ { uint64_t x65 = cmovznz(x62, x52, x34);
+ { uint64_t x66 = cmovznz(x62, x49, x31);
+ { uint64_t x67 = cmovznz(x62, x46, x28);
+ { uint64_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e384m5x2e368m1/femul.c b/src/Specific/montgomery64_2e384m5x2e368m1/femul.c
index 644e34634..5b8229d0f 100644
--- a/src/Specific/montgomery64_2e384m5x2e368m1/femul.c
+++ b/src/Specific/montgomery64_2e384m5x2e368m1/femul.c
@@ -1,266 +1,260 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
-{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
-{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
-{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
-{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
-{ uint64_t x62; uint64_t x61 = _mulx_u64(x25, 0xffffffffffffffffL, &x62);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x25, 0xffffffffffffffffL, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x25, 0xffffffffffffffffL, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x25, 0xffffffffffffffffL, &x71);
-{ uint64_t x74; uint64_t x73 = _mulx_u64(x25, 0xffffffffffffffffL, &x74);
-{ uint64_t x77; uint64_t x76 = _mulx_u64(x25, 0xfffaffffffffffffL, &x77);
-{ uint64_t x79; uint8_t x80 = _addcarryx_u64(0x0, x62, x64, &x79);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(x80, x65, x67, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
-{ uint64_t x94; uint8_t _ = _addcarryx_u64(0x0, x92, x77, &x94);
-{ uint64_t _; uint8_t x98 = _addcarryx_u64(0x0, x25, x61, &_);
-{ uint64_t x100; uint8_t x101 = _addcarryx_u64(x98, x43, x79, &x100);
-{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x46, x82, &x103);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x49, x85, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x52, x88, &x109);
-{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x55, x91, &x112);
-{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x58, x94, &x115);
-{ uint64_t x119; uint64_t x118 = _mulx_u64(x7, x15, &x119);
-{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x17, &x122);
-{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x19, &x125);
-{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x21, &x128);
-{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x23, &x131);
-{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x22, &x134);
-{ uint64_t x136; uint8_t x137 = _addcarryx_u64(0x0, x119, x121, &x136);
-{ uint64_t x139; uint8_t x140 = _addcarryx_u64(x137, x122, x124, &x139);
-{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
-{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
-{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
-{ uint64_t x151; uint8_t _ = _addcarryx_u64(0x0, x149, x134, &x151);
-{ uint64_t x154; uint8_t x155 = _addcarryx_u64(0x0, x100, x118, &x154);
-{ uint64_t x157; uint8_t x158 = _addcarryx_u64(x155, x103, x136, &x157);
-{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
-{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
-{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
-{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
-{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x116, x151, &x172);
-{ uint64_t x176; uint64_t x175 = _mulx_u64(x154, 0xffffffffffffffffL, &x176);
-{ uint64_t x179; uint64_t x178 = _mulx_u64(x154, 0xffffffffffffffffL, &x179);
-{ uint64_t x182; uint64_t x181 = _mulx_u64(x154, 0xffffffffffffffffL, &x182);
-{ uint64_t x185; uint64_t x184 = _mulx_u64(x154, 0xffffffffffffffffL, &x185);
-{ uint64_t x188; uint64_t x187 = _mulx_u64(x154, 0xffffffffffffffffL, &x188);
-{ uint64_t x191; uint64_t x190 = _mulx_u64(x154, 0xfffaffffffffffffL, &x191);
-{ uint64_t x193; uint8_t x194 = _addcarryx_u64(0x0, x176, x178, &x193);
-{ uint64_t x196; uint8_t x197 = _addcarryx_u64(x194, x179, x181, &x196);
-{ uint64_t x199; uint8_t x200 = _addcarryx_u64(x197, x182, x184, &x199);
-{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
-{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
-{ uint64_t x208; uint8_t _ = _addcarryx_u64(0x0, x206, x191, &x208);
-{ uint64_t _; uint8_t x212 = _addcarryx_u64(0x0, x154, x175, &_);
-{ uint64_t x214; uint8_t x215 = _addcarryx_u64(x212, x157, x193, &x214);
-{ uint64_t x217; uint8_t x218 = _addcarryx_u64(x215, x160, x196, &x217);
-{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x163, x199, &x220);
-{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x166, x202, &x223);
-{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x169, x205, &x226);
-{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x172, x208, &x229);
-{ uint8_t x231 = (x230 + x173);
-{ uint64_t x234; uint64_t x233 = _mulx_u64(x9, x15, &x234);
-{ uint64_t x237; uint64_t x236 = _mulx_u64(x9, x17, &x237);
-{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x19, &x240);
-{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x21, &x243);
-{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x23, &x246);
-{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x22, &x249);
-{ uint64_t x251; uint8_t x252 = _addcarryx_u64(0x0, x234, x236, &x251);
-{ uint64_t x254; uint8_t x255 = _addcarryx_u64(x252, x237, x239, &x254);
-{ uint64_t x257; uint8_t x258 = _addcarryx_u64(x255, x240, x242, &x257);
-{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
-{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
-{ uint64_t x266; uint8_t _ = _addcarryx_u64(0x0, x264, x249, &x266);
-{ uint64_t x269; uint8_t x270 = _addcarryx_u64(0x0, x214, x233, &x269);
-{ uint64_t x272; uint8_t x273 = _addcarryx_u64(x270, x217, x251, &x272);
-{ uint64_t x275; uint8_t x276 = _addcarryx_u64(x273, x220, x254, &x275);
-{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
-{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
-{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
-{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x231, x266, &x287);
-{ uint64_t x291; uint64_t x290 = _mulx_u64(x269, 0xffffffffffffffffL, &x291);
-{ uint64_t x294; uint64_t x293 = _mulx_u64(x269, 0xffffffffffffffffL, &x294);
-{ uint64_t x297; uint64_t x296 = _mulx_u64(x269, 0xffffffffffffffffL, &x297);
-{ uint64_t x300; uint64_t x299 = _mulx_u64(x269, 0xffffffffffffffffL, &x300);
-{ uint64_t x303; uint64_t x302 = _mulx_u64(x269, 0xffffffffffffffffL, &x303);
-{ uint64_t x306; uint64_t x305 = _mulx_u64(x269, 0xfffaffffffffffffL, &x306);
-{ uint64_t x308; uint8_t x309 = _addcarryx_u64(0x0, x291, x293, &x308);
-{ uint64_t x311; uint8_t x312 = _addcarryx_u64(x309, x294, x296, &x311);
-{ uint64_t x314; uint8_t x315 = _addcarryx_u64(x312, x297, x299, &x314);
-{ uint64_t x317; uint8_t x318 = _addcarryx_u64(x315, x300, x302, &x317);
-{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
-{ uint64_t x323; uint8_t _ = _addcarryx_u64(0x0, x321, x306, &x323);
-{ uint64_t _; uint8_t x327 = _addcarryx_u64(0x0, x269, x290, &_);
-{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x272, x308, &x329);
-{ uint64_t x332; uint8_t x333 = _addcarryx_u64(x330, x275, x311, &x332);
-{ uint64_t x335; uint8_t x336 = _addcarryx_u64(x333, x278, x314, &x335);
-{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x281, x317, &x338);
-{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x284, x320, &x341);
-{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x287, x323, &x344);
-{ uint8_t x346 = (x345 + x288);
-{ uint64_t x349; uint64_t x348 = _mulx_u64(x11, x15, &x349);
-{ uint64_t x352; uint64_t x351 = _mulx_u64(x11, x17, &x352);
-{ uint64_t x355; uint64_t x354 = _mulx_u64(x11, x19, &x355);
-{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x21, &x358);
-{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x23, &x361);
-{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x22, &x364);
-{ uint64_t x366; uint8_t x367 = _addcarryx_u64(0x0, x349, x351, &x366);
-{ uint64_t x369; uint8_t x370 = _addcarryx_u64(x367, x352, x354, &x369);
-{ uint64_t x372; uint8_t x373 = _addcarryx_u64(x370, x355, x357, &x372);
-{ uint64_t x375; uint8_t x376 = _addcarryx_u64(x373, x358, x360, &x375);
-{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
-{ uint64_t x381; uint8_t _ = _addcarryx_u64(0x0, x379, x364, &x381);
-{ uint64_t x384; uint8_t x385 = _addcarryx_u64(0x0, x329, x348, &x384);
-{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x332, x366, &x387);
-{ uint64_t x390; uint8_t x391 = _addcarryx_u64(x388, x335, x369, &x390);
-{ uint64_t x393; uint8_t x394 = _addcarryx_u64(x391, x338, x372, &x393);
-{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
-{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
-{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x346, x381, &x402);
-{ uint64_t x406; uint64_t x405 = _mulx_u64(x384, 0xffffffffffffffffL, &x406);
-{ uint64_t x409; uint64_t x408 = _mulx_u64(x384, 0xffffffffffffffffL, &x409);
-{ uint64_t x412; uint64_t x411 = _mulx_u64(x384, 0xffffffffffffffffL, &x412);
-{ uint64_t x415; uint64_t x414 = _mulx_u64(x384, 0xffffffffffffffffL, &x415);
-{ uint64_t x418; uint64_t x417 = _mulx_u64(x384, 0xffffffffffffffffL, &x418);
-{ uint64_t x421; uint64_t x420 = _mulx_u64(x384, 0xfffaffffffffffffL, &x421);
-{ uint64_t x423; uint8_t x424 = _addcarryx_u64(0x0, x406, x408, &x423);
-{ uint64_t x426; uint8_t x427 = _addcarryx_u64(x424, x409, x411, &x426);
-{ uint64_t x429; uint8_t x430 = _addcarryx_u64(x427, x412, x414, &x429);
-{ uint64_t x432; uint8_t x433 = _addcarryx_u64(x430, x415, x417, &x432);
-{ uint64_t x435; uint8_t x436 = _addcarryx_u64(x433, x418, x420, &x435);
-{ uint64_t x438; uint8_t _ = _addcarryx_u64(0x0, x436, x421, &x438);
-{ uint64_t _; uint8_t x442 = _addcarryx_u64(0x0, x384, x405, &_);
-{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x387, x423, &x444);
-{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x390, x426, &x447);
-{ uint64_t x450; uint8_t x451 = _addcarryx_u64(x448, x393, x429, &x450);
-{ uint64_t x453; uint8_t x454 = _addcarryx_u64(x451, x396, x432, &x453);
-{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x399, x435, &x456);
-{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x402, x438, &x459);
-{ uint8_t x461 = (x460 + x403);
-{ uint64_t x464; uint64_t x463 = _mulx_u64(x13, x15, &x464);
-{ uint64_t x467; uint64_t x466 = _mulx_u64(x13, x17, &x467);
-{ uint64_t x470; uint64_t x469 = _mulx_u64(x13, x19, &x470);
-{ uint64_t x473; uint64_t x472 = _mulx_u64(x13, x21, &x473);
-{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x23, &x476);
-{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x22, &x479);
-{ uint64_t x481; uint8_t x482 = _addcarryx_u64(0x0, x464, x466, &x481);
-{ uint64_t x484; uint8_t x485 = _addcarryx_u64(x482, x467, x469, &x484);
-{ uint64_t x487; uint8_t x488 = _addcarryx_u64(x485, x470, x472, &x487);
-{ uint64_t x490; uint8_t x491 = _addcarryx_u64(x488, x473, x475, &x490);
-{ uint64_t x493; uint8_t x494 = _addcarryx_u64(x491, x476, x478, &x493);
-{ uint64_t x496; uint8_t _ = _addcarryx_u64(0x0, x494, x479, &x496);
-{ uint64_t x499; uint8_t x500 = _addcarryx_u64(0x0, x444, x463, &x499);
-{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x447, x481, &x502);
-{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x450, x484, &x505);
-{ uint64_t x508; uint8_t x509 = _addcarryx_u64(x506, x453, x487, &x508);
-{ uint64_t x511; uint8_t x512 = _addcarryx_u64(x509, x456, x490, &x511);
-{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
-{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x461, x496, &x517);
-{ uint64_t x521; uint64_t x520 = _mulx_u64(x499, 0xffffffffffffffffL, &x521);
-{ uint64_t x524; uint64_t x523 = _mulx_u64(x499, 0xffffffffffffffffL, &x524);
-{ uint64_t x527; uint64_t x526 = _mulx_u64(x499, 0xffffffffffffffffL, &x527);
-{ uint64_t x530; uint64_t x529 = _mulx_u64(x499, 0xffffffffffffffffL, &x530);
-{ uint64_t x533; uint64_t x532 = _mulx_u64(x499, 0xffffffffffffffffL, &x533);
-{ uint64_t x536; uint64_t x535 = _mulx_u64(x499, 0xfffaffffffffffffL, &x536);
-{ uint64_t x538; uint8_t x539 = _addcarryx_u64(0x0, x521, x523, &x538);
-{ uint64_t x541; uint8_t x542 = _addcarryx_u64(x539, x524, x526, &x541);
-{ uint64_t x544; uint8_t x545 = _addcarryx_u64(x542, x527, x529, &x544);
-{ uint64_t x547; uint8_t x548 = _addcarryx_u64(x545, x530, x532, &x547);
-{ uint64_t x550; uint8_t x551 = _addcarryx_u64(x548, x533, x535, &x550);
-{ uint64_t x553; uint8_t _ = _addcarryx_u64(0x0, x551, x536, &x553);
-{ uint64_t _; uint8_t x557 = _addcarryx_u64(0x0, x499, x520, &_);
-{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x502, x538, &x559);
-{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x505, x541, &x562);
-{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x508, x544, &x565);
-{ uint64_t x568; uint8_t x569 = _addcarryx_u64(x566, x511, x547, &x568);
-{ uint64_t x571; uint8_t x572 = _addcarryx_u64(x569, x514, x550, &x571);
-{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x517, x553, &x574);
-{ uint8_t x576 = (x575 + x518);
-{ uint64_t x579; uint64_t x578 = _mulx_u64(x12, x15, &x579);
-{ uint64_t x582; uint64_t x581 = _mulx_u64(x12, x17, &x582);
-{ uint64_t x585; uint64_t x584 = _mulx_u64(x12, x19, &x585);
-{ uint64_t x588; uint64_t x587 = _mulx_u64(x12, x21, &x588);
-{ uint64_t x591; uint64_t x590 = _mulx_u64(x12, x23, &x591);
-{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x22, &x594);
-{ uint64_t x596; uint8_t x597 = _addcarryx_u64(0x0, x579, x581, &x596);
-{ uint64_t x599; uint8_t x600 = _addcarryx_u64(x597, x582, x584, &x599);
-{ uint64_t x602; uint8_t x603 = _addcarryx_u64(x600, x585, x587, &x602);
-{ uint64_t x605; uint8_t x606 = _addcarryx_u64(x603, x588, x590, &x605);
-{ uint64_t x608; uint8_t x609 = _addcarryx_u64(x606, x591, x593, &x608);
-{ uint64_t x611; uint8_t _ = _addcarryx_u64(0x0, x609, x594, &x611);
-{ uint64_t x614; uint8_t x615 = _addcarryx_u64(0x0, x559, x578, &x614);
-{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x562, x596, &x617);
-{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x565, x599, &x620);
-{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x568, x602, &x623);
-{ uint64_t x626; uint8_t x627 = _addcarryx_u64(x624, x571, x605, &x626);
-{ uint64_t x629; uint8_t x630 = _addcarryx_u64(x627, x574, x608, &x629);
-{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x576, x611, &x632);
-{ uint64_t x636; uint64_t x635 = _mulx_u64(x614, 0xffffffffffffffffL, &x636);
-{ uint64_t x639; uint64_t x638 = _mulx_u64(x614, 0xffffffffffffffffL, &x639);
-{ uint64_t x642; uint64_t x641 = _mulx_u64(x614, 0xffffffffffffffffL, &x642);
-{ uint64_t x645; uint64_t x644 = _mulx_u64(x614, 0xffffffffffffffffL, &x645);
-{ uint64_t x648; uint64_t x647 = _mulx_u64(x614, 0xffffffffffffffffL, &x648);
-{ uint64_t x651; uint64_t x650 = _mulx_u64(x614, 0xfffaffffffffffffL, &x651);
-{ uint64_t x653; uint8_t x654 = _addcarryx_u64(0x0, x636, x638, &x653);
-{ uint64_t x656; uint8_t x657 = _addcarryx_u64(x654, x639, x641, &x656);
-{ uint64_t x659; uint8_t x660 = _addcarryx_u64(x657, x642, x644, &x659);
-{ uint64_t x662; uint8_t x663 = _addcarryx_u64(x660, x645, x647, &x662);
-{ uint64_t x665; uint8_t x666 = _addcarryx_u64(x663, x648, x650, &x665);
-{ uint64_t x668; uint8_t _ = _addcarryx_u64(0x0, x666, x651, &x668);
-{ uint64_t _; uint8_t x672 = _addcarryx_u64(0x0, x614, x635, &_);
-{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x617, x653, &x674);
-{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x620, x656, &x677);
-{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x623, x659, &x680);
-{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x626, x662, &x683);
-{ uint64_t x686; uint8_t x687 = _addcarryx_u64(x684, x629, x665, &x686);
-{ uint64_t x689; uint8_t x690 = _addcarryx_u64(x687, x632, x668, &x689);
-{ uint8_t x691 = (x690 + x633);
-{ uint64_t x693; uint8_t x694 = _subborrow_u64(0x0, x674, 0xffffffffffffffffL, &x693);
-{ uint64_t x696; uint8_t x697 = _subborrow_u64(x694, x677, 0xffffffffffffffffL, &x696);
-{ uint64_t x699; uint8_t x700 = _subborrow_u64(x697, x680, 0xffffffffffffffffL, &x699);
-{ uint64_t x702; uint8_t x703 = _subborrow_u64(x700, x683, 0xffffffffffffffffL, &x702);
-{ uint64_t x705; uint8_t x706 = _subborrow_u64(x703, x686, 0xffffffffffffffffL, &x705);
-{ uint64_t x708; uint8_t x709 = _subborrow_u64(x706, x689, 0xfffaffffffffffffL, &x708);
-{ uint64_t _; uint8_t x712 = _subborrow_u64(x709, x691, 0x0, &_);
-{ uint64_t x713 = cmovznz(x712, x708, x689);
-{ uint64_t x714 = cmovznz(x712, x705, x686);
-{ uint64_t x715 = cmovznz(x712, x702, x683);
-{ uint64_t x716 = cmovznz(x712, x699, x680);
-{ uint64_t x717 = cmovznz(x712, x696, x677);
-{ uint64_t x718 = cmovznz(x712, x693, x674);
-out[0] = x713;
-out[1] = x714;
-out[2] = x715;
-out[3] = x716;
-out[4] = x717;
-out[5] = x718;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+ { uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+ { uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+ { uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+ { uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+ { uint64_t x62; uint64_t x61 = _mulx_u64(x25, 0xffffffffffffffffL, &x62);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x25, 0xffffffffffffffffL, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x25, 0xffffffffffffffffL, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x25, 0xffffffffffffffffL, &x71);
+ { uint64_t x74; uint64_t x73 = _mulx_u64(x25, 0xffffffffffffffffL, &x74);
+ { uint64_t x77; uint64_t x76 = _mulx_u64(x25, 0xfffaffffffffffffL, &x77);
+ { uint64_t x79; uint8_t x80 = _addcarryx_u64(0x0, x62, x64, &x79);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(x80, x65, x67, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+ { uint64_t x94; uint8_t _ = _addcarryx_u64(0x0, x92, x77, &x94);
+ { uint64_t _; uint8_t x98 = _addcarryx_u64(0x0, x25, x61, &_);
+ { uint64_t x100; uint8_t x101 = _addcarryx_u64(x98, x43, x79, &x100);
+ { uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x46, x82, &x103);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x49, x85, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x52, x88, &x109);
+ { uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x55, x91, &x112);
+ { uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x58, x94, &x115);
+ { uint64_t x119; uint64_t x118 = _mulx_u64(x7, x15, &x119);
+ { uint64_t x122; uint64_t x121 = _mulx_u64(x7, x17, &x122);
+ { uint64_t x125; uint64_t x124 = _mulx_u64(x7, x19, &x125);
+ { uint64_t x128; uint64_t x127 = _mulx_u64(x7, x21, &x128);
+ { uint64_t x131; uint64_t x130 = _mulx_u64(x7, x23, &x131);
+ { uint64_t x134; uint64_t x133 = _mulx_u64(x7, x22, &x134);
+ { uint64_t x136; uint8_t x137 = _addcarryx_u64(0x0, x119, x121, &x136);
+ { uint64_t x139; uint8_t x140 = _addcarryx_u64(x137, x122, x124, &x139);
+ { uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+ { uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+ { uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+ { uint64_t x151; uint8_t _ = _addcarryx_u64(0x0, x149, x134, &x151);
+ { uint64_t x154; uint8_t x155 = _addcarryx_u64(0x0, x100, x118, &x154);
+ { uint64_t x157; uint8_t x158 = _addcarryx_u64(x155, x103, x136, &x157);
+ { uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+ { uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+ { uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+ { uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+ { uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x116, x151, &x172);
+ { uint64_t x176; uint64_t x175 = _mulx_u64(x154, 0xffffffffffffffffL, &x176);
+ { uint64_t x179; uint64_t x178 = _mulx_u64(x154, 0xffffffffffffffffL, &x179);
+ { uint64_t x182; uint64_t x181 = _mulx_u64(x154, 0xffffffffffffffffL, &x182);
+ { uint64_t x185; uint64_t x184 = _mulx_u64(x154, 0xffffffffffffffffL, &x185);
+ { uint64_t x188; uint64_t x187 = _mulx_u64(x154, 0xffffffffffffffffL, &x188);
+ { uint64_t x191; uint64_t x190 = _mulx_u64(x154, 0xfffaffffffffffffL, &x191);
+ { uint64_t x193; uint8_t x194 = _addcarryx_u64(0x0, x176, x178, &x193);
+ { uint64_t x196; uint8_t x197 = _addcarryx_u64(x194, x179, x181, &x196);
+ { uint64_t x199; uint8_t x200 = _addcarryx_u64(x197, x182, x184, &x199);
+ { uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+ { uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+ { uint64_t x208; uint8_t _ = _addcarryx_u64(0x0, x206, x191, &x208);
+ { uint64_t _; uint8_t x212 = _addcarryx_u64(0x0, x154, x175, &_);
+ { uint64_t x214; uint8_t x215 = _addcarryx_u64(x212, x157, x193, &x214);
+ { uint64_t x217; uint8_t x218 = _addcarryx_u64(x215, x160, x196, &x217);
+ { uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x163, x199, &x220);
+ { uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x166, x202, &x223);
+ { uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x169, x205, &x226);
+ { uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x172, x208, &x229);
+ { uint8_t x231 = (x230 + x173);
+ { uint64_t x234; uint64_t x233 = _mulx_u64(x9, x15, &x234);
+ { uint64_t x237; uint64_t x236 = _mulx_u64(x9, x17, &x237);
+ { uint64_t x240; uint64_t x239 = _mulx_u64(x9, x19, &x240);
+ { uint64_t x243; uint64_t x242 = _mulx_u64(x9, x21, &x243);
+ { uint64_t x246; uint64_t x245 = _mulx_u64(x9, x23, &x246);
+ { uint64_t x249; uint64_t x248 = _mulx_u64(x9, x22, &x249);
+ { uint64_t x251; uint8_t x252 = _addcarryx_u64(0x0, x234, x236, &x251);
+ { uint64_t x254; uint8_t x255 = _addcarryx_u64(x252, x237, x239, &x254);
+ { uint64_t x257; uint8_t x258 = _addcarryx_u64(x255, x240, x242, &x257);
+ { uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+ { uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+ { uint64_t x266; uint8_t _ = _addcarryx_u64(0x0, x264, x249, &x266);
+ { uint64_t x269; uint8_t x270 = _addcarryx_u64(0x0, x214, x233, &x269);
+ { uint64_t x272; uint8_t x273 = _addcarryx_u64(x270, x217, x251, &x272);
+ { uint64_t x275; uint8_t x276 = _addcarryx_u64(x273, x220, x254, &x275);
+ { uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+ { uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+ { uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+ { uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x231, x266, &x287);
+ { uint64_t x291; uint64_t x290 = _mulx_u64(x269, 0xffffffffffffffffL, &x291);
+ { uint64_t x294; uint64_t x293 = _mulx_u64(x269, 0xffffffffffffffffL, &x294);
+ { uint64_t x297; uint64_t x296 = _mulx_u64(x269, 0xffffffffffffffffL, &x297);
+ { uint64_t x300; uint64_t x299 = _mulx_u64(x269, 0xffffffffffffffffL, &x300);
+ { uint64_t x303; uint64_t x302 = _mulx_u64(x269, 0xffffffffffffffffL, &x303);
+ { uint64_t x306; uint64_t x305 = _mulx_u64(x269, 0xfffaffffffffffffL, &x306);
+ { uint64_t x308; uint8_t x309 = _addcarryx_u64(0x0, x291, x293, &x308);
+ { uint64_t x311; uint8_t x312 = _addcarryx_u64(x309, x294, x296, &x311);
+ { uint64_t x314; uint8_t x315 = _addcarryx_u64(x312, x297, x299, &x314);
+ { uint64_t x317; uint8_t x318 = _addcarryx_u64(x315, x300, x302, &x317);
+ { uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+ { uint64_t x323; uint8_t _ = _addcarryx_u64(0x0, x321, x306, &x323);
+ { uint64_t _; uint8_t x327 = _addcarryx_u64(0x0, x269, x290, &_);
+ { uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x272, x308, &x329);
+ { uint64_t x332; uint8_t x333 = _addcarryx_u64(x330, x275, x311, &x332);
+ { uint64_t x335; uint8_t x336 = _addcarryx_u64(x333, x278, x314, &x335);
+ { uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x281, x317, &x338);
+ { uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x284, x320, &x341);
+ { uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x287, x323, &x344);
+ { uint8_t x346 = (x345 + x288);
+ { uint64_t x349; uint64_t x348 = _mulx_u64(x11, x15, &x349);
+ { uint64_t x352; uint64_t x351 = _mulx_u64(x11, x17, &x352);
+ { uint64_t x355; uint64_t x354 = _mulx_u64(x11, x19, &x355);
+ { uint64_t x358; uint64_t x357 = _mulx_u64(x11, x21, &x358);
+ { uint64_t x361; uint64_t x360 = _mulx_u64(x11, x23, &x361);
+ { uint64_t x364; uint64_t x363 = _mulx_u64(x11, x22, &x364);
+ { uint64_t x366; uint8_t x367 = _addcarryx_u64(0x0, x349, x351, &x366);
+ { uint64_t x369; uint8_t x370 = _addcarryx_u64(x367, x352, x354, &x369);
+ { uint64_t x372; uint8_t x373 = _addcarryx_u64(x370, x355, x357, &x372);
+ { uint64_t x375; uint8_t x376 = _addcarryx_u64(x373, x358, x360, &x375);
+ { uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+ { uint64_t x381; uint8_t _ = _addcarryx_u64(0x0, x379, x364, &x381);
+ { uint64_t x384; uint8_t x385 = _addcarryx_u64(0x0, x329, x348, &x384);
+ { uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x332, x366, &x387);
+ { uint64_t x390; uint8_t x391 = _addcarryx_u64(x388, x335, x369, &x390);
+ { uint64_t x393; uint8_t x394 = _addcarryx_u64(x391, x338, x372, &x393);
+ { uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+ { uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+ { uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x346, x381, &x402);
+ { uint64_t x406; uint64_t x405 = _mulx_u64(x384, 0xffffffffffffffffL, &x406);
+ { uint64_t x409; uint64_t x408 = _mulx_u64(x384, 0xffffffffffffffffL, &x409);
+ { uint64_t x412; uint64_t x411 = _mulx_u64(x384, 0xffffffffffffffffL, &x412);
+ { uint64_t x415; uint64_t x414 = _mulx_u64(x384, 0xffffffffffffffffL, &x415);
+ { uint64_t x418; uint64_t x417 = _mulx_u64(x384, 0xffffffffffffffffL, &x418);
+ { uint64_t x421; uint64_t x420 = _mulx_u64(x384, 0xfffaffffffffffffL, &x421);
+ { uint64_t x423; uint8_t x424 = _addcarryx_u64(0x0, x406, x408, &x423);
+ { uint64_t x426; uint8_t x427 = _addcarryx_u64(x424, x409, x411, &x426);
+ { uint64_t x429; uint8_t x430 = _addcarryx_u64(x427, x412, x414, &x429);
+ { uint64_t x432; uint8_t x433 = _addcarryx_u64(x430, x415, x417, &x432);
+ { uint64_t x435; uint8_t x436 = _addcarryx_u64(x433, x418, x420, &x435);
+ { uint64_t x438; uint8_t _ = _addcarryx_u64(0x0, x436, x421, &x438);
+ { uint64_t _; uint8_t x442 = _addcarryx_u64(0x0, x384, x405, &_);
+ { uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x387, x423, &x444);
+ { uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x390, x426, &x447);
+ { uint64_t x450; uint8_t x451 = _addcarryx_u64(x448, x393, x429, &x450);
+ { uint64_t x453; uint8_t x454 = _addcarryx_u64(x451, x396, x432, &x453);
+ { uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x399, x435, &x456);
+ { uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x402, x438, &x459);
+ { uint8_t x461 = (x460 + x403);
+ { uint64_t x464; uint64_t x463 = _mulx_u64(x13, x15, &x464);
+ { uint64_t x467; uint64_t x466 = _mulx_u64(x13, x17, &x467);
+ { uint64_t x470; uint64_t x469 = _mulx_u64(x13, x19, &x470);
+ { uint64_t x473; uint64_t x472 = _mulx_u64(x13, x21, &x473);
+ { uint64_t x476; uint64_t x475 = _mulx_u64(x13, x23, &x476);
+ { uint64_t x479; uint64_t x478 = _mulx_u64(x13, x22, &x479);
+ { uint64_t x481; uint8_t x482 = _addcarryx_u64(0x0, x464, x466, &x481);
+ { uint64_t x484; uint8_t x485 = _addcarryx_u64(x482, x467, x469, &x484);
+ { uint64_t x487; uint8_t x488 = _addcarryx_u64(x485, x470, x472, &x487);
+ { uint64_t x490; uint8_t x491 = _addcarryx_u64(x488, x473, x475, &x490);
+ { uint64_t x493; uint8_t x494 = _addcarryx_u64(x491, x476, x478, &x493);
+ { uint64_t x496; uint8_t _ = _addcarryx_u64(0x0, x494, x479, &x496);
+ { uint64_t x499; uint8_t x500 = _addcarryx_u64(0x0, x444, x463, &x499);
+ { uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x447, x481, &x502);
+ { uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x450, x484, &x505);
+ { uint64_t x508; uint8_t x509 = _addcarryx_u64(x506, x453, x487, &x508);
+ { uint64_t x511; uint8_t x512 = _addcarryx_u64(x509, x456, x490, &x511);
+ { uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+ { uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x461, x496, &x517);
+ { uint64_t x521; uint64_t x520 = _mulx_u64(x499, 0xffffffffffffffffL, &x521);
+ { uint64_t x524; uint64_t x523 = _mulx_u64(x499, 0xffffffffffffffffL, &x524);
+ { uint64_t x527; uint64_t x526 = _mulx_u64(x499, 0xffffffffffffffffL, &x527);
+ { uint64_t x530; uint64_t x529 = _mulx_u64(x499, 0xffffffffffffffffL, &x530);
+ { uint64_t x533; uint64_t x532 = _mulx_u64(x499, 0xffffffffffffffffL, &x533);
+ { uint64_t x536; uint64_t x535 = _mulx_u64(x499, 0xfffaffffffffffffL, &x536);
+ { uint64_t x538; uint8_t x539 = _addcarryx_u64(0x0, x521, x523, &x538);
+ { uint64_t x541; uint8_t x542 = _addcarryx_u64(x539, x524, x526, &x541);
+ { uint64_t x544; uint8_t x545 = _addcarryx_u64(x542, x527, x529, &x544);
+ { uint64_t x547; uint8_t x548 = _addcarryx_u64(x545, x530, x532, &x547);
+ { uint64_t x550; uint8_t x551 = _addcarryx_u64(x548, x533, x535, &x550);
+ { uint64_t x553; uint8_t _ = _addcarryx_u64(0x0, x551, x536, &x553);
+ { uint64_t _; uint8_t x557 = _addcarryx_u64(0x0, x499, x520, &_);
+ { uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x502, x538, &x559);
+ { uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x505, x541, &x562);
+ { uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x508, x544, &x565);
+ { uint64_t x568; uint8_t x569 = _addcarryx_u64(x566, x511, x547, &x568);
+ { uint64_t x571; uint8_t x572 = _addcarryx_u64(x569, x514, x550, &x571);
+ { uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x517, x553, &x574);
+ { uint8_t x576 = (x575 + x518);
+ { uint64_t x579; uint64_t x578 = _mulx_u64(x12, x15, &x579);
+ { uint64_t x582; uint64_t x581 = _mulx_u64(x12, x17, &x582);
+ { uint64_t x585; uint64_t x584 = _mulx_u64(x12, x19, &x585);
+ { uint64_t x588; uint64_t x587 = _mulx_u64(x12, x21, &x588);
+ { uint64_t x591; uint64_t x590 = _mulx_u64(x12, x23, &x591);
+ { uint64_t x594; uint64_t x593 = _mulx_u64(x12, x22, &x594);
+ { uint64_t x596; uint8_t x597 = _addcarryx_u64(0x0, x579, x581, &x596);
+ { uint64_t x599; uint8_t x600 = _addcarryx_u64(x597, x582, x584, &x599);
+ { uint64_t x602; uint8_t x603 = _addcarryx_u64(x600, x585, x587, &x602);
+ { uint64_t x605; uint8_t x606 = _addcarryx_u64(x603, x588, x590, &x605);
+ { uint64_t x608; uint8_t x609 = _addcarryx_u64(x606, x591, x593, &x608);
+ { uint64_t x611; uint8_t _ = _addcarryx_u64(0x0, x609, x594, &x611);
+ { uint64_t x614; uint8_t x615 = _addcarryx_u64(0x0, x559, x578, &x614);
+ { uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x562, x596, &x617);
+ { uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x565, x599, &x620);
+ { uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x568, x602, &x623);
+ { uint64_t x626; uint8_t x627 = _addcarryx_u64(x624, x571, x605, &x626);
+ { uint64_t x629; uint8_t x630 = _addcarryx_u64(x627, x574, x608, &x629);
+ { uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x576, x611, &x632);
+ { uint64_t x636; uint64_t x635 = _mulx_u64(x614, 0xffffffffffffffffL, &x636);
+ { uint64_t x639; uint64_t x638 = _mulx_u64(x614, 0xffffffffffffffffL, &x639);
+ { uint64_t x642; uint64_t x641 = _mulx_u64(x614, 0xffffffffffffffffL, &x642);
+ { uint64_t x645; uint64_t x644 = _mulx_u64(x614, 0xffffffffffffffffL, &x645);
+ { uint64_t x648; uint64_t x647 = _mulx_u64(x614, 0xffffffffffffffffL, &x648);
+ { uint64_t x651; uint64_t x650 = _mulx_u64(x614, 0xfffaffffffffffffL, &x651);
+ { uint64_t x653; uint8_t x654 = _addcarryx_u64(0x0, x636, x638, &x653);
+ { uint64_t x656; uint8_t x657 = _addcarryx_u64(x654, x639, x641, &x656);
+ { uint64_t x659; uint8_t x660 = _addcarryx_u64(x657, x642, x644, &x659);
+ { uint64_t x662; uint8_t x663 = _addcarryx_u64(x660, x645, x647, &x662);
+ { uint64_t x665; uint8_t x666 = _addcarryx_u64(x663, x648, x650, &x665);
+ { uint64_t x668; uint8_t _ = _addcarryx_u64(0x0, x666, x651, &x668);
+ { uint64_t _; uint8_t x672 = _addcarryx_u64(0x0, x614, x635, &_);
+ { uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x617, x653, &x674);
+ { uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x620, x656, &x677);
+ { uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x623, x659, &x680);
+ { uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x626, x662, &x683);
+ { uint64_t x686; uint8_t x687 = _addcarryx_u64(x684, x629, x665, &x686);
+ { uint64_t x689; uint8_t x690 = _addcarryx_u64(x687, x632, x668, &x689);
+ { uint8_t x691 = (x690 + x633);
+ { uint64_t x693; uint8_t x694 = _subborrow_u64(0x0, x674, 0xffffffffffffffffL, &x693);
+ { uint64_t x696; uint8_t x697 = _subborrow_u64(x694, x677, 0xffffffffffffffffL, &x696);
+ { uint64_t x699; uint8_t x700 = _subborrow_u64(x697, x680, 0xffffffffffffffffL, &x699);
+ { uint64_t x702; uint8_t x703 = _subborrow_u64(x700, x683, 0xffffffffffffffffL, &x702);
+ { uint64_t x705; uint8_t x706 = _subborrow_u64(x703, x686, 0xffffffffffffffffL, &x705);
+ { uint64_t x708; uint8_t x709 = _subborrow_u64(x706, x689, 0xfffaffffffffffffL, &x708);
+ { uint64_t _; uint8_t x712 = _subborrow_u64(x709, x691, 0x0, &_);
+ { uint64_t x713 = cmovznz(x712, x708, x689);
+ { uint64_t x714 = cmovznz(x712, x705, x686);
+ { uint64_t x715 = cmovznz(x712, x702, x683);
+ { uint64_t x716 = cmovznz(x712, x699, x680);
+ { uint64_t x717 = cmovznz(x712, x696, x677);
+ { uint64_t x718 = cmovznz(x712, x693, x674);
+ out[0] = x718;
+ out[1] = x717;
+ out[2] = x716;
+ out[3] = x715;
+ out[4] = x714;
+ out[5] = x713;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e384m5x2e368m1/fenz.c b/src/Specific/montgomery64_2e384m5x2e368m1/fenz.c
index ad3763a14..c93f74257 100644
--- a/src/Specific/montgomery64_2e384m5x2e368m1/fenz.c
+++ b/src/Specific/montgomery64_2e384m5x2e368m1/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x11 = (x10 | x9);
-{ uint64_t x12 = (x8 | x11);
-{ uint64_t x13 = (x6 | x12);
-{ uint64_t x14 = (x4 | x13);
-{ uint64_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x11 = (x10 | x9);
+ { uint64_t x12 = (x8 | x11);
+ { uint64_t x13 = (x6 | x12);
+ { uint64_t x14 = (x4 | x13);
+ { uint64_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e384m5x2e368m1/feopp.c b/src/Specific/montgomery64_2e384m5x2e368m1/feopp.c
index 4b1126029..b6bdcfdb8 100644
--- a/src/Specific/montgomery64_2e384m5x2e368m1/feopp.c
+++ b/src/Specific/montgomery64_2e384m5x2e368m1/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
-{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
-{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
-{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
-{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
-{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
-{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
-{ uint64_t x30 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
-{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
-{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
-{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
-{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
-{ uint64_t x50 = (x29 & 0xfffaffffffffffffL);
-{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+ { uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+ { uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+ { uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+ { uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+ { uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+ { uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+ { uint64_t x30 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+ { uint64_t x34 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+ { uint64_t x38 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+ { uint64_t x42 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+ { uint64_t x46 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+ { uint64_t x50 = (x29 & 0xfffaffffffffffffL);
+ { uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e384m5x2e368m1/fesub.c b/src/Specific/montgomery64_2e384m5x2e368m1/fesub.c
index 18a509846..5625e228f 100644
--- a/src/Specific/montgomery64_2e384m5x2e368m1/fesub.c
+++ b/src/Specific/montgomery64_2e384m5x2e368m1/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
-{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
-{ uint64_t x43 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
-{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
-{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
-{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
-{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
-{ uint64_t x63 = (x42 & 0xfffaffffffffffffL);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+ { uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+ { uint64_t x43 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+ { uint64_t x47 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+ { uint64_t x51 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+ { uint64_t x55 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+ { uint64_t x59 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+ { uint64_t x63 = (x42 & 0xfffaffffffffffffL);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e384m79x2e376m1/feadd.c b/src/Specific/montgomery64_2e384m79x2e376m1/feadd.c
index 7344abfcb..bb6d2bd92 100644
--- a/src/Specific/montgomery64_2e384m79x2e376m1/feadd.c
+++ b/src/Specific/montgomery64_2e384m79x2e376m1/feadd.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
-{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffffffL, &x43);
-{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
-{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
-{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
-{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
-{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0xb0ffffffffffffffL, &x58);
-{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
-{ uint64_t x63 = cmovznz(x62, x58, x40);
-{ uint64_t x64 = cmovznz(x62, x55, x37);
-{ uint64_t x65 = cmovznz(x62, x52, x34);
-{ uint64_t x66 = cmovznz(x62, x49, x31);
-{ uint64_t x67 = cmovznz(x62, x46, x28);
-{ uint64_t x68 = cmovznz(x62, x43, x25);
-out[0] = x63;
-out[1] = x64;
-out[2] = x65;
-out[3] = x66;
-out[4] = x67;
-out[5] = x68;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feadd(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+ { uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffffffL, &x43);
+ { uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+ { uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+ { uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+ { uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+ { uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0xb0ffffffffffffffL, &x58);
+ { uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+ { uint64_t x63 = cmovznz(x62, x58, x40);
+ { uint64_t x64 = cmovznz(x62, x55, x37);
+ { uint64_t x65 = cmovznz(x62, x52, x34);
+ { uint64_t x66 = cmovznz(x62, x49, x31);
+ { uint64_t x67 = cmovznz(x62, x46, x28);
+ { uint64_t x68 = cmovznz(x62, x43, x25);
+ out[0] = x68;
+ out[1] = x67;
+ out[2] = x66;
+ out[3] = x65;
+ out[4] = x64;
+ out[5] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e384m79x2e376m1/femul.c b/src/Specific/montgomery64_2e384m79x2e376m1/femul.c
index 15d39e5aa..6bf94c68b 100644
--- a/src/Specific/montgomery64_2e384m79x2e376m1/femul.c
+++ b/src/Specific/montgomery64_2e384m79x2e376m1/femul.c
@@ -1,266 +1,260 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
-{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
-{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
-{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
-{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
-{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
-{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
-{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
-{ uint64_t x62; uint64_t x61 = _mulx_u64(x25, 0xffffffffffffffffL, &x62);
-{ uint64_t x65; uint64_t x64 = _mulx_u64(x25, 0xffffffffffffffffL, &x65);
-{ uint64_t x68; uint64_t x67 = _mulx_u64(x25, 0xffffffffffffffffL, &x68);
-{ uint64_t x71; uint64_t x70 = _mulx_u64(x25, 0xffffffffffffffffL, &x71);
-{ uint64_t x74; uint64_t x73 = _mulx_u64(x25, 0xffffffffffffffffL, &x74);
-{ uint64_t x77; uint64_t x76 = _mulx_u64(x25, 0xb0ffffffffffffffL, &x77);
-{ uint64_t x79; uint8_t x80 = _addcarryx_u64(0x0, x62, x64, &x79);
-{ uint64_t x82; uint8_t x83 = _addcarryx_u64(x80, x65, x67, &x82);
-{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
-{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
-{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
-{ uint64_t x94; uint8_t _ = _addcarryx_u64(0x0, x92, x77, &x94);
-{ uint64_t _; uint8_t x98 = _addcarryx_u64(0x0, x25, x61, &_);
-{ uint64_t x100; uint8_t x101 = _addcarryx_u64(x98, x43, x79, &x100);
-{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x46, x82, &x103);
-{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x49, x85, &x106);
-{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x52, x88, &x109);
-{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x55, x91, &x112);
-{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x58, x94, &x115);
-{ uint64_t x119; uint64_t x118 = _mulx_u64(x7, x15, &x119);
-{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x17, &x122);
-{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x19, &x125);
-{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x21, &x128);
-{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x23, &x131);
-{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x22, &x134);
-{ uint64_t x136; uint8_t x137 = _addcarryx_u64(0x0, x119, x121, &x136);
-{ uint64_t x139; uint8_t x140 = _addcarryx_u64(x137, x122, x124, &x139);
-{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
-{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
-{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
-{ uint64_t x151; uint8_t _ = _addcarryx_u64(0x0, x149, x134, &x151);
-{ uint64_t x154; uint8_t x155 = _addcarryx_u64(0x0, x100, x118, &x154);
-{ uint64_t x157; uint8_t x158 = _addcarryx_u64(x155, x103, x136, &x157);
-{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
-{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
-{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
-{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
-{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x116, x151, &x172);
-{ uint64_t x176; uint64_t x175 = _mulx_u64(x154, 0xffffffffffffffffL, &x176);
-{ uint64_t x179; uint64_t x178 = _mulx_u64(x154, 0xffffffffffffffffL, &x179);
-{ uint64_t x182; uint64_t x181 = _mulx_u64(x154, 0xffffffffffffffffL, &x182);
-{ uint64_t x185; uint64_t x184 = _mulx_u64(x154, 0xffffffffffffffffL, &x185);
-{ uint64_t x188; uint64_t x187 = _mulx_u64(x154, 0xffffffffffffffffL, &x188);
-{ uint64_t x191; uint64_t x190 = _mulx_u64(x154, 0xb0ffffffffffffffL, &x191);
-{ uint64_t x193; uint8_t x194 = _addcarryx_u64(0x0, x176, x178, &x193);
-{ uint64_t x196; uint8_t x197 = _addcarryx_u64(x194, x179, x181, &x196);
-{ uint64_t x199; uint8_t x200 = _addcarryx_u64(x197, x182, x184, &x199);
-{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
-{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
-{ uint64_t x208; uint8_t _ = _addcarryx_u64(0x0, x206, x191, &x208);
-{ uint64_t _; uint8_t x212 = _addcarryx_u64(0x0, x154, x175, &_);
-{ uint64_t x214; uint8_t x215 = _addcarryx_u64(x212, x157, x193, &x214);
-{ uint64_t x217; uint8_t x218 = _addcarryx_u64(x215, x160, x196, &x217);
-{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x163, x199, &x220);
-{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x166, x202, &x223);
-{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x169, x205, &x226);
-{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x172, x208, &x229);
-{ uint8_t x231 = (x230 + x173);
-{ uint64_t x234; uint64_t x233 = _mulx_u64(x9, x15, &x234);
-{ uint64_t x237; uint64_t x236 = _mulx_u64(x9, x17, &x237);
-{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x19, &x240);
-{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x21, &x243);
-{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x23, &x246);
-{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x22, &x249);
-{ uint64_t x251; uint8_t x252 = _addcarryx_u64(0x0, x234, x236, &x251);
-{ uint64_t x254; uint8_t x255 = _addcarryx_u64(x252, x237, x239, &x254);
-{ uint64_t x257; uint8_t x258 = _addcarryx_u64(x255, x240, x242, &x257);
-{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
-{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
-{ uint64_t x266; uint8_t _ = _addcarryx_u64(0x0, x264, x249, &x266);
-{ uint64_t x269; uint8_t x270 = _addcarryx_u64(0x0, x214, x233, &x269);
-{ uint64_t x272; uint8_t x273 = _addcarryx_u64(x270, x217, x251, &x272);
-{ uint64_t x275; uint8_t x276 = _addcarryx_u64(x273, x220, x254, &x275);
-{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
-{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
-{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
-{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x231, x266, &x287);
-{ uint64_t x291; uint64_t x290 = _mulx_u64(x269, 0xffffffffffffffffL, &x291);
-{ uint64_t x294; uint64_t x293 = _mulx_u64(x269, 0xffffffffffffffffL, &x294);
-{ uint64_t x297; uint64_t x296 = _mulx_u64(x269, 0xffffffffffffffffL, &x297);
-{ uint64_t x300; uint64_t x299 = _mulx_u64(x269, 0xffffffffffffffffL, &x300);
-{ uint64_t x303; uint64_t x302 = _mulx_u64(x269, 0xffffffffffffffffL, &x303);
-{ uint64_t x306; uint64_t x305 = _mulx_u64(x269, 0xb0ffffffffffffffL, &x306);
-{ uint64_t x308; uint8_t x309 = _addcarryx_u64(0x0, x291, x293, &x308);
-{ uint64_t x311; uint8_t x312 = _addcarryx_u64(x309, x294, x296, &x311);
-{ uint64_t x314; uint8_t x315 = _addcarryx_u64(x312, x297, x299, &x314);
-{ uint64_t x317; uint8_t x318 = _addcarryx_u64(x315, x300, x302, &x317);
-{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
-{ uint64_t x323; uint8_t _ = _addcarryx_u64(0x0, x321, x306, &x323);
-{ uint64_t _; uint8_t x327 = _addcarryx_u64(0x0, x269, x290, &_);
-{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x272, x308, &x329);
-{ uint64_t x332; uint8_t x333 = _addcarryx_u64(x330, x275, x311, &x332);
-{ uint64_t x335; uint8_t x336 = _addcarryx_u64(x333, x278, x314, &x335);
-{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x281, x317, &x338);
-{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x284, x320, &x341);
-{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x287, x323, &x344);
-{ uint8_t x346 = (x345 + x288);
-{ uint64_t x349; uint64_t x348 = _mulx_u64(x11, x15, &x349);
-{ uint64_t x352; uint64_t x351 = _mulx_u64(x11, x17, &x352);
-{ uint64_t x355; uint64_t x354 = _mulx_u64(x11, x19, &x355);
-{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x21, &x358);
-{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x23, &x361);
-{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x22, &x364);
-{ uint64_t x366; uint8_t x367 = _addcarryx_u64(0x0, x349, x351, &x366);
-{ uint64_t x369; uint8_t x370 = _addcarryx_u64(x367, x352, x354, &x369);
-{ uint64_t x372; uint8_t x373 = _addcarryx_u64(x370, x355, x357, &x372);
-{ uint64_t x375; uint8_t x376 = _addcarryx_u64(x373, x358, x360, &x375);
-{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
-{ uint64_t x381; uint8_t _ = _addcarryx_u64(0x0, x379, x364, &x381);
-{ uint64_t x384; uint8_t x385 = _addcarryx_u64(0x0, x329, x348, &x384);
-{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x332, x366, &x387);
-{ uint64_t x390; uint8_t x391 = _addcarryx_u64(x388, x335, x369, &x390);
-{ uint64_t x393; uint8_t x394 = _addcarryx_u64(x391, x338, x372, &x393);
-{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
-{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
-{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x346, x381, &x402);
-{ uint64_t x406; uint64_t x405 = _mulx_u64(x384, 0xffffffffffffffffL, &x406);
-{ uint64_t x409; uint64_t x408 = _mulx_u64(x384, 0xffffffffffffffffL, &x409);
-{ uint64_t x412; uint64_t x411 = _mulx_u64(x384, 0xffffffffffffffffL, &x412);
-{ uint64_t x415; uint64_t x414 = _mulx_u64(x384, 0xffffffffffffffffL, &x415);
-{ uint64_t x418; uint64_t x417 = _mulx_u64(x384, 0xffffffffffffffffL, &x418);
-{ uint64_t x421; uint64_t x420 = _mulx_u64(x384, 0xb0ffffffffffffffL, &x421);
-{ uint64_t x423; uint8_t x424 = _addcarryx_u64(0x0, x406, x408, &x423);
-{ uint64_t x426; uint8_t x427 = _addcarryx_u64(x424, x409, x411, &x426);
-{ uint64_t x429; uint8_t x430 = _addcarryx_u64(x427, x412, x414, &x429);
-{ uint64_t x432; uint8_t x433 = _addcarryx_u64(x430, x415, x417, &x432);
-{ uint64_t x435; uint8_t x436 = _addcarryx_u64(x433, x418, x420, &x435);
-{ uint64_t x438; uint8_t _ = _addcarryx_u64(0x0, x436, x421, &x438);
-{ uint64_t _; uint8_t x442 = _addcarryx_u64(0x0, x384, x405, &_);
-{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x387, x423, &x444);
-{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x390, x426, &x447);
-{ uint64_t x450; uint8_t x451 = _addcarryx_u64(x448, x393, x429, &x450);
-{ uint64_t x453; uint8_t x454 = _addcarryx_u64(x451, x396, x432, &x453);
-{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x399, x435, &x456);
-{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x402, x438, &x459);
-{ uint8_t x461 = (x460 + x403);
-{ uint64_t x464; uint64_t x463 = _mulx_u64(x13, x15, &x464);
-{ uint64_t x467; uint64_t x466 = _mulx_u64(x13, x17, &x467);
-{ uint64_t x470; uint64_t x469 = _mulx_u64(x13, x19, &x470);
-{ uint64_t x473; uint64_t x472 = _mulx_u64(x13, x21, &x473);
-{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x23, &x476);
-{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x22, &x479);
-{ uint64_t x481; uint8_t x482 = _addcarryx_u64(0x0, x464, x466, &x481);
-{ uint64_t x484; uint8_t x485 = _addcarryx_u64(x482, x467, x469, &x484);
-{ uint64_t x487; uint8_t x488 = _addcarryx_u64(x485, x470, x472, &x487);
-{ uint64_t x490; uint8_t x491 = _addcarryx_u64(x488, x473, x475, &x490);
-{ uint64_t x493; uint8_t x494 = _addcarryx_u64(x491, x476, x478, &x493);
-{ uint64_t x496; uint8_t _ = _addcarryx_u64(0x0, x494, x479, &x496);
-{ uint64_t x499; uint8_t x500 = _addcarryx_u64(0x0, x444, x463, &x499);
-{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x447, x481, &x502);
-{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x450, x484, &x505);
-{ uint64_t x508; uint8_t x509 = _addcarryx_u64(x506, x453, x487, &x508);
-{ uint64_t x511; uint8_t x512 = _addcarryx_u64(x509, x456, x490, &x511);
-{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
-{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x461, x496, &x517);
-{ uint64_t x521; uint64_t x520 = _mulx_u64(x499, 0xffffffffffffffffL, &x521);
-{ uint64_t x524; uint64_t x523 = _mulx_u64(x499, 0xffffffffffffffffL, &x524);
-{ uint64_t x527; uint64_t x526 = _mulx_u64(x499, 0xffffffffffffffffL, &x527);
-{ uint64_t x530; uint64_t x529 = _mulx_u64(x499, 0xffffffffffffffffL, &x530);
-{ uint64_t x533; uint64_t x532 = _mulx_u64(x499, 0xffffffffffffffffL, &x533);
-{ uint64_t x536; uint64_t x535 = _mulx_u64(x499, 0xb0ffffffffffffffL, &x536);
-{ uint64_t x538; uint8_t x539 = _addcarryx_u64(0x0, x521, x523, &x538);
-{ uint64_t x541; uint8_t x542 = _addcarryx_u64(x539, x524, x526, &x541);
-{ uint64_t x544; uint8_t x545 = _addcarryx_u64(x542, x527, x529, &x544);
-{ uint64_t x547; uint8_t x548 = _addcarryx_u64(x545, x530, x532, &x547);
-{ uint64_t x550; uint8_t x551 = _addcarryx_u64(x548, x533, x535, &x550);
-{ uint64_t x553; uint8_t _ = _addcarryx_u64(0x0, x551, x536, &x553);
-{ uint64_t _; uint8_t x557 = _addcarryx_u64(0x0, x499, x520, &_);
-{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x502, x538, &x559);
-{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x505, x541, &x562);
-{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x508, x544, &x565);
-{ uint64_t x568; uint8_t x569 = _addcarryx_u64(x566, x511, x547, &x568);
-{ uint64_t x571; uint8_t x572 = _addcarryx_u64(x569, x514, x550, &x571);
-{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x517, x553, &x574);
-{ uint8_t x576 = (x575 + x518);
-{ uint64_t x579; uint64_t x578 = _mulx_u64(x12, x15, &x579);
-{ uint64_t x582; uint64_t x581 = _mulx_u64(x12, x17, &x582);
-{ uint64_t x585; uint64_t x584 = _mulx_u64(x12, x19, &x585);
-{ uint64_t x588; uint64_t x587 = _mulx_u64(x12, x21, &x588);
-{ uint64_t x591; uint64_t x590 = _mulx_u64(x12, x23, &x591);
-{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x22, &x594);
-{ uint64_t x596; uint8_t x597 = _addcarryx_u64(0x0, x579, x581, &x596);
-{ uint64_t x599; uint8_t x600 = _addcarryx_u64(x597, x582, x584, &x599);
-{ uint64_t x602; uint8_t x603 = _addcarryx_u64(x600, x585, x587, &x602);
-{ uint64_t x605; uint8_t x606 = _addcarryx_u64(x603, x588, x590, &x605);
-{ uint64_t x608; uint8_t x609 = _addcarryx_u64(x606, x591, x593, &x608);
-{ uint64_t x611; uint8_t _ = _addcarryx_u64(0x0, x609, x594, &x611);
-{ uint64_t x614; uint8_t x615 = _addcarryx_u64(0x0, x559, x578, &x614);
-{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x562, x596, &x617);
-{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x565, x599, &x620);
-{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x568, x602, &x623);
-{ uint64_t x626; uint8_t x627 = _addcarryx_u64(x624, x571, x605, &x626);
-{ uint64_t x629; uint8_t x630 = _addcarryx_u64(x627, x574, x608, &x629);
-{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x576, x611, &x632);
-{ uint64_t x636; uint64_t x635 = _mulx_u64(x614, 0xffffffffffffffffL, &x636);
-{ uint64_t x639; uint64_t x638 = _mulx_u64(x614, 0xffffffffffffffffL, &x639);
-{ uint64_t x642; uint64_t x641 = _mulx_u64(x614, 0xffffffffffffffffL, &x642);
-{ uint64_t x645; uint64_t x644 = _mulx_u64(x614, 0xffffffffffffffffL, &x645);
-{ uint64_t x648; uint64_t x647 = _mulx_u64(x614, 0xffffffffffffffffL, &x648);
-{ uint64_t x651; uint64_t x650 = _mulx_u64(x614, 0xb0ffffffffffffffL, &x651);
-{ uint64_t x653; uint8_t x654 = _addcarryx_u64(0x0, x636, x638, &x653);
-{ uint64_t x656; uint8_t x657 = _addcarryx_u64(x654, x639, x641, &x656);
-{ uint64_t x659; uint8_t x660 = _addcarryx_u64(x657, x642, x644, &x659);
-{ uint64_t x662; uint8_t x663 = _addcarryx_u64(x660, x645, x647, &x662);
-{ uint64_t x665; uint8_t x666 = _addcarryx_u64(x663, x648, x650, &x665);
-{ uint64_t x668; uint8_t _ = _addcarryx_u64(0x0, x666, x651, &x668);
-{ uint64_t _; uint8_t x672 = _addcarryx_u64(0x0, x614, x635, &_);
-{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x617, x653, &x674);
-{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x620, x656, &x677);
-{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x623, x659, &x680);
-{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x626, x662, &x683);
-{ uint64_t x686; uint8_t x687 = _addcarryx_u64(x684, x629, x665, &x686);
-{ uint64_t x689; uint8_t x690 = _addcarryx_u64(x687, x632, x668, &x689);
-{ uint8_t x691 = (x690 + x633);
-{ uint64_t x693; uint8_t x694 = _subborrow_u64(0x0, x674, 0xffffffffffffffffL, &x693);
-{ uint64_t x696; uint8_t x697 = _subborrow_u64(x694, x677, 0xffffffffffffffffL, &x696);
-{ uint64_t x699; uint8_t x700 = _subborrow_u64(x697, x680, 0xffffffffffffffffL, &x699);
-{ uint64_t x702; uint8_t x703 = _subborrow_u64(x700, x683, 0xffffffffffffffffL, &x702);
-{ uint64_t x705; uint8_t x706 = _subborrow_u64(x703, x686, 0xffffffffffffffffL, &x705);
-{ uint64_t x708; uint8_t x709 = _subborrow_u64(x706, x689, 0xb0ffffffffffffffL, &x708);
-{ uint64_t _; uint8_t x712 = _subborrow_u64(x709, x691, 0x0, &_);
-{ uint64_t x713 = cmovznz(x712, x708, x689);
-{ uint64_t x714 = cmovznz(x712, x705, x686);
-{ uint64_t x715 = cmovznz(x712, x702, x683);
-{ uint64_t x716 = cmovznz(x712, x699, x680);
-{ uint64_t x717 = cmovznz(x712, x696, x677);
-{ uint64_t x718 = cmovznz(x712, x693, x674);
-out[0] = x713;
-out[1] = x714;
-out[2] = x715;
-out[3] = x716;
-out[4] = x717;
-out[5] = x718;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+ { uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+ { uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+ { uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+ { uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+ { uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+ { uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+ { uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+ { uint64_t x62; uint64_t x61 = _mulx_u64(x25, 0xffffffffffffffffL, &x62);
+ { uint64_t x65; uint64_t x64 = _mulx_u64(x25, 0xffffffffffffffffL, &x65);
+ { uint64_t x68; uint64_t x67 = _mulx_u64(x25, 0xffffffffffffffffL, &x68);
+ { uint64_t x71; uint64_t x70 = _mulx_u64(x25, 0xffffffffffffffffL, &x71);
+ { uint64_t x74; uint64_t x73 = _mulx_u64(x25, 0xffffffffffffffffL, &x74);
+ { uint64_t x77; uint64_t x76 = _mulx_u64(x25, 0xb0ffffffffffffffL, &x77);
+ { uint64_t x79; uint8_t x80 = _addcarryx_u64(0x0, x62, x64, &x79);
+ { uint64_t x82; uint8_t x83 = _addcarryx_u64(x80, x65, x67, &x82);
+ { uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+ { uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+ { uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+ { uint64_t x94; uint8_t _ = _addcarryx_u64(0x0, x92, x77, &x94);
+ { uint64_t _; uint8_t x98 = _addcarryx_u64(0x0, x25, x61, &_);
+ { uint64_t x100; uint8_t x101 = _addcarryx_u64(x98, x43, x79, &x100);
+ { uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x46, x82, &x103);
+ { uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x49, x85, &x106);
+ { uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x52, x88, &x109);
+ { uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x55, x91, &x112);
+ { uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x58, x94, &x115);
+ { uint64_t x119; uint64_t x118 = _mulx_u64(x7, x15, &x119);
+ { uint64_t x122; uint64_t x121 = _mulx_u64(x7, x17, &x122);
+ { uint64_t x125; uint64_t x124 = _mulx_u64(x7, x19, &x125);
+ { uint64_t x128; uint64_t x127 = _mulx_u64(x7, x21, &x128);
+ { uint64_t x131; uint64_t x130 = _mulx_u64(x7, x23, &x131);
+ { uint64_t x134; uint64_t x133 = _mulx_u64(x7, x22, &x134);
+ { uint64_t x136; uint8_t x137 = _addcarryx_u64(0x0, x119, x121, &x136);
+ { uint64_t x139; uint8_t x140 = _addcarryx_u64(x137, x122, x124, &x139);
+ { uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+ { uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+ { uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+ { uint64_t x151; uint8_t _ = _addcarryx_u64(0x0, x149, x134, &x151);
+ { uint64_t x154; uint8_t x155 = _addcarryx_u64(0x0, x100, x118, &x154);
+ { uint64_t x157; uint8_t x158 = _addcarryx_u64(x155, x103, x136, &x157);
+ { uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+ { uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+ { uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+ { uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+ { uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x116, x151, &x172);
+ { uint64_t x176; uint64_t x175 = _mulx_u64(x154, 0xffffffffffffffffL, &x176);
+ { uint64_t x179; uint64_t x178 = _mulx_u64(x154, 0xffffffffffffffffL, &x179);
+ { uint64_t x182; uint64_t x181 = _mulx_u64(x154, 0xffffffffffffffffL, &x182);
+ { uint64_t x185; uint64_t x184 = _mulx_u64(x154, 0xffffffffffffffffL, &x185);
+ { uint64_t x188; uint64_t x187 = _mulx_u64(x154, 0xffffffffffffffffL, &x188);
+ { uint64_t x191; uint64_t x190 = _mulx_u64(x154, 0xb0ffffffffffffffL, &x191);
+ { uint64_t x193; uint8_t x194 = _addcarryx_u64(0x0, x176, x178, &x193);
+ { uint64_t x196; uint8_t x197 = _addcarryx_u64(x194, x179, x181, &x196);
+ { uint64_t x199; uint8_t x200 = _addcarryx_u64(x197, x182, x184, &x199);
+ { uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+ { uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+ { uint64_t x208; uint8_t _ = _addcarryx_u64(0x0, x206, x191, &x208);
+ { uint64_t _; uint8_t x212 = _addcarryx_u64(0x0, x154, x175, &_);
+ { uint64_t x214; uint8_t x215 = _addcarryx_u64(x212, x157, x193, &x214);
+ { uint64_t x217; uint8_t x218 = _addcarryx_u64(x215, x160, x196, &x217);
+ { uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x163, x199, &x220);
+ { uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x166, x202, &x223);
+ { uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x169, x205, &x226);
+ { uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x172, x208, &x229);
+ { uint8_t x231 = (x230 + x173);
+ { uint64_t x234; uint64_t x233 = _mulx_u64(x9, x15, &x234);
+ { uint64_t x237; uint64_t x236 = _mulx_u64(x9, x17, &x237);
+ { uint64_t x240; uint64_t x239 = _mulx_u64(x9, x19, &x240);
+ { uint64_t x243; uint64_t x242 = _mulx_u64(x9, x21, &x243);
+ { uint64_t x246; uint64_t x245 = _mulx_u64(x9, x23, &x246);
+ { uint64_t x249; uint64_t x248 = _mulx_u64(x9, x22, &x249);
+ { uint64_t x251; uint8_t x252 = _addcarryx_u64(0x0, x234, x236, &x251);
+ { uint64_t x254; uint8_t x255 = _addcarryx_u64(x252, x237, x239, &x254);
+ { uint64_t x257; uint8_t x258 = _addcarryx_u64(x255, x240, x242, &x257);
+ { uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+ { uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+ { uint64_t x266; uint8_t _ = _addcarryx_u64(0x0, x264, x249, &x266);
+ { uint64_t x269; uint8_t x270 = _addcarryx_u64(0x0, x214, x233, &x269);
+ { uint64_t x272; uint8_t x273 = _addcarryx_u64(x270, x217, x251, &x272);
+ { uint64_t x275; uint8_t x276 = _addcarryx_u64(x273, x220, x254, &x275);
+ { uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+ { uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+ { uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+ { uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x231, x266, &x287);
+ { uint64_t x291; uint64_t x290 = _mulx_u64(x269, 0xffffffffffffffffL, &x291);
+ { uint64_t x294; uint64_t x293 = _mulx_u64(x269, 0xffffffffffffffffL, &x294);
+ { uint64_t x297; uint64_t x296 = _mulx_u64(x269, 0xffffffffffffffffL, &x297);
+ { uint64_t x300; uint64_t x299 = _mulx_u64(x269, 0xffffffffffffffffL, &x300);
+ { uint64_t x303; uint64_t x302 = _mulx_u64(x269, 0xffffffffffffffffL, &x303);
+ { uint64_t x306; uint64_t x305 = _mulx_u64(x269, 0xb0ffffffffffffffL, &x306);
+ { uint64_t x308; uint8_t x309 = _addcarryx_u64(0x0, x291, x293, &x308);
+ { uint64_t x311; uint8_t x312 = _addcarryx_u64(x309, x294, x296, &x311);
+ { uint64_t x314; uint8_t x315 = _addcarryx_u64(x312, x297, x299, &x314);
+ { uint64_t x317; uint8_t x318 = _addcarryx_u64(x315, x300, x302, &x317);
+ { uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+ { uint64_t x323; uint8_t _ = _addcarryx_u64(0x0, x321, x306, &x323);
+ { uint64_t _; uint8_t x327 = _addcarryx_u64(0x0, x269, x290, &_);
+ { uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x272, x308, &x329);
+ { uint64_t x332; uint8_t x333 = _addcarryx_u64(x330, x275, x311, &x332);
+ { uint64_t x335; uint8_t x336 = _addcarryx_u64(x333, x278, x314, &x335);
+ { uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x281, x317, &x338);
+ { uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x284, x320, &x341);
+ { uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x287, x323, &x344);
+ { uint8_t x346 = (x345 + x288);
+ { uint64_t x349; uint64_t x348 = _mulx_u64(x11, x15, &x349);
+ { uint64_t x352; uint64_t x351 = _mulx_u64(x11, x17, &x352);
+ { uint64_t x355; uint64_t x354 = _mulx_u64(x11, x19, &x355);
+ { uint64_t x358; uint64_t x357 = _mulx_u64(x11, x21, &x358);
+ { uint64_t x361; uint64_t x360 = _mulx_u64(x11, x23, &x361);
+ { uint64_t x364; uint64_t x363 = _mulx_u64(x11, x22, &x364);
+ { uint64_t x366; uint8_t x367 = _addcarryx_u64(0x0, x349, x351, &x366);
+ { uint64_t x369; uint8_t x370 = _addcarryx_u64(x367, x352, x354, &x369);
+ { uint64_t x372; uint8_t x373 = _addcarryx_u64(x370, x355, x357, &x372);
+ { uint64_t x375; uint8_t x376 = _addcarryx_u64(x373, x358, x360, &x375);
+ { uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+ { uint64_t x381; uint8_t _ = _addcarryx_u64(0x0, x379, x364, &x381);
+ { uint64_t x384; uint8_t x385 = _addcarryx_u64(0x0, x329, x348, &x384);
+ { uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x332, x366, &x387);
+ { uint64_t x390; uint8_t x391 = _addcarryx_u64(x388, x335, x369, &x390);
+ { uint64_t x393; uint8_t x394 = _addcarryx_u64(x391, x338, x372, &x393);
+ { uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+ { uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+ { uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x346, x381, &x402);
+ { uint64_t x406; uint64_t x405 = _mulx_u64(x384, 0xffffffffffffffffL, &x406);
+ { uint64_t x409; uint64_t x408 = _mulx_u64(x384, 0xffffffffffffffffL, &x409);
+ { uint64_t x412; uint64_t x411 = _mulx_u64(x384, 0xffffffffffffffffL, &x412);
+ { uint64_t x415; uint64_t x414 = _mulx_u64(x384, 0xffffffffffffffffL, &x415);
+ { uint64_t x418; uint64_t x417 = _mulx_u64(x384, 0xffffffffffffffffL, &x418);
+ { uint64_t x421; uint64_t x420 = _mulx_u64(x384, 0xb0ffffffffffffffL, &x421);
+ { uint64_t x423; uint8_t x424 = _addcarryx_u64(0x0, x406, x408, &x423);
+ { uint64_t x426; uint8_t x427 = _addcarryx_u64(x424, x409, x411, &x426);
+ { uint64_t x429; uint8_t x430 = _addcarryx_u64(x427, x412, x414, &x429);
+ { uint64_t x432; uint8_t x433 = _addcarryx_u64(x430, x415, x417, &x432);
+ { uint64_t x435; uint8_t x436 = _addcarryx_u64(x433, x418, x420, &x435);
+ { uint64_t x438; uint8_t _ = _addcarryx_u64(0x0, x436, x421, &x438);
+ { uint64_t _; uint8_t x442 = _addcarryx_u64(0x0, x384, x405, &_);
+ { uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x387, x423, &x444);
+ { uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x390, x426, &x447);
+ { uint64_t x450; uint8_t x451 = _addcarryx_u64(x448, x393, x429, &x450);
+ { uint64_t x453; uint8_t x454 = _addcarryx_u64(x451, x396, x432, &x453);
+ { uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x399, x435, &x456);
+ { uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x402, x438, &x459);
+ { uint8_t x461 = (x460 + x403);
+ { uint64_t x464; uint64_t x463 = _mulx_u64(x13, x15, &x464);
+ { uint64_t x467; uint64_t x466 = _mulx_u64(x13, x17, &x467);
+ { uint64_t x470; uint64_t x469 = _mulx_u64(x13, x19, &x470);
+ { uint64_t x473; uint64_t x472 = _mulx_u64(x13, x21, &x473);
+ { uint64_t x476; uint64_t x475 = _mulx_u64(x13, x23, &x476);
+ { uint64_t x479; uint64_t x478 = _mulx_u64(x13, x22, &x479);
+ { uint64_t x481; uint8_t x482 = _addcarryx_u64(0x0, x464, x466, &x481);
+ { uint64_t x484; uint8_t x485 = _addcarryx_u64(x482, x467, x469, &x484);
+ { uint64_t x487; uint8_t x488 = _addcarryx_u64(x485, x470, x472, &x487);
+ { uint64_t x490; uint8_t x491 = _addcarryx_u64(x488, x473, x475, &x490);
+ { uint64_t x493; uint8_t x494 = _addcarryx_u64(x491, x476, x478, &x493);
+ { uint64_t x496; uint8_t _ = _addcarryx_u64(0x0, x494, x479, &x496);
+ { uint64_t x499; uint8_t x500 = _addcarryx_u64(0x0, x444, x463, &x499);
+ { uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x447, x481, &x502);
+ { uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x450, x484, &x505);
+ { uint64_t x508; uint8_t x509 = _addcarryx_u64(x506, x453, x487, &x508);
+ { uint64_t x511; uint8_t x512 = _addcarryx_u64(x509, x456, x490, &x511);
+ { uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+ { uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x461, x496, &x517);
+ { uint64_t x521; uint64_t x520 = _mulx_u64(x499, 0xffffffffffffffffL, &x521);
+ { uint64_t x524; uint64_t x523 = _mulx_u64(x499, 0xffffffffffffffffL, &x524);
+ { uint64_t x527; uint64_t x526 = _mulx_u64(x499, 0xffffffffffffffffL, &x527);
+ { uint64_t x530; uint64_t x529 = _mulx_u64(x499, 0xffffffffffffffffL, &x530);
+ { uint64_t x533; uint64_t x532 = _mulx_u64(x499, 0xffffffffffffffffL, &x533);
+ { uint64_t x536; uint64_t x535 = _mulx_u64(x499, 0xb0ffffffffffffffL, &x536);
+ { uint64_t x538; uint8_t x539 = _addcarryx_u64(0x0, x521, x523, &x538);
+ { uint64_t x541; uint8_t x542 = _addcarryx_u64(x539, x524, x526, &x541);
+ { uint64_t x544; uint8_t x545 = _addcarryx_u64(x542, x527, x529, &x544);
+ { uint64_t x547; uint8_t x548 = _addcarryx_u64(x545, x530, x532, &x547);
+ { uint64_t x550; uint8_t x551 = _addcarryx_u64(x548, x533, x535, &x550);
+ { uint64_t x553; uint8_t _ = _addcarryx_u64(0x0, x551, x536, &x553);
+ { uint64_t _; uint8_t x557 = _addcarryx_u64(0x0, x499, x520, &_);
+ { uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x502, x538, &x559);
+ { uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x505, x541, &x562);
+ { uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x508, x544, &x565);
+ { uint64_t x568; uint8_t x569 = _addcarryx_u64(x566, x511, x547, &x568);
+ { uint64_t x571; uint8_t x572 = _addcarryx_u64(x569, x514, x550, &x571);
+ { uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x517, x553, &x574);
+ { uint8_t x576 = (x575 + x518);
+ { uint64_t x579; uint64_t x578 = _mulx_u64(x12, x15, &x579);
+ { uint64_t x582; uint64_t x581 = _mulx_u64(x12, x17, &x582);
+ { uint64_t x585; uint64_t x584 = _mulx_u64(x12, x19, &x585);
+ { uint64_t x588; uint64_t x587 = _mulx_u64(x12, x21, &x588);
+ { uint64_t x591; uint64_t x590 = _mulx_u64(x12, x23, &x591);
+ { uint64_t x594; uint64_t x593 = _mulx_u64(x12, x22, &x594);
+ { uint64_t x596; uint8_t x597 = _addcarryx_u64(0x0, x579, x581, &x596);
+ { uint64_t x599; uint8_t x600 = _addcarryx_u64(x597, x582, x584, &x599);
+ { uint64_t x602; uint8_t x603 = _addcarryx_u64(x600, x585, x587, &x602);
+ { uint64_t x605; uint8_t x606 = _addcarryx_u64(x603, x588, x590, &x605);
+ { uint64_t x608; uint8_t x609 = _addcarryx_u64(x606, x591, x593, &x608);
+ { uint64_t x611; uint8_t _ = _addcarryx_u64(0x0, x609, x594, &x611);
+ { uint64_t x614; uint8_t x615 = _addcarryx_u64(0x0, x559, x578, &x614);
+ { uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x562, x596, &x617);
+ { uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x565, x599, &x620);
+ { uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x568, x602, &x623);
+ { uint64_t x626; uint8_t x627 = _addcarryx_u64(x624, x571, x605, &x626);
+ { uint64_t x629; uint8_t x630 = _addcarryx_u64(x627, x574, x608, &x629);
+ { uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x576, x611, &x632);
+ { uint64_t x636; uint64_t x635 = _mulx_u64(x614, 0xffffffffffffffffL, &x636);
+ { uint64_t x639; uint64_t x638 = _mulx_u64(x614, 0xffffffffffffffffL, &x639);
+ { uint64_t x642; uint64_t x641 = _mulx_u64(x614, 0xffffffffffffffffL, &x642);
+ { uint64_t x645; uint64_t x644 = _mulx_u64(x614, 0xffffffffffffffffL, &x645);
+ { uint64_t x648; uint64_t x647 = _mulx_u64(x614, 0xffffffffffffffffL, &x648);
+ { uint64_t x651; uint64_t x650 = _mulx_u64(x614, 0xb0ffffffffffffffL, &x651);
+ { uint64_t x653; uint8_t x654 = _addcarryx_u64(0x0, x636, x638, &x653);
+ { uint64_t x656; uint8_t x657 = _addcarryx_u64(x654, x639, x641, &x656);
+ { uint64_t x659; uint8_t x660 = _addcarryx_u64(x657, x642, x644, &x659);
+ { uint64_t x662; uint8_t x663 = _addcarryx_u64(x660, x645, x647, &x662);
+ { uint64_t x665; uint8_t x666 = _addcarryx_u64(x663, x648, x650, &x665);
+ { uint64_t x668; uint8_t _ = _addcarryx_u64(0x0, x666, x651, &x668);
+ { uint64_t _; uint8_t x672 = _addcarryx_u64(0x0, x614, x635, &_);
+ { uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x617, x653, &x674);
+ { uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x620, x656, &x677);
+ { uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x623, x659, &x680);
+ { uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x626, x662, &x683);
+ { uint64_t x686; uint8_t x687 = _addcarryx_u64(x684, x629, x665, &x686);
+ { uint64_t x689; uint8_t x690 = _addcarryx_u64(x687, x632, x668, &x689);
+ { uint8_t x691 = (x690 + x633);
+ { uint64_t x693; uint8_t x694 = _subborrow_u64(0x0, x674, 0xffffffffffffffffL, &x693);
+ { uint64_t x696; uint8_t x697 = _subborrow_u64(x694, x677, 0xffffffffffffffffL, &x696);
+ { uint64_t x699; uint8_t x700 = _subborrow_u64(x697, x680, 0xffffffffffffffffL, &x699);
+ { uint64_t x702; uint8_t x703 = _subborrow_u64(x700, x683, 0xffffffffffffffffL, &x702);
+ { uint64_t x705; uint8_t x706 = _subborrow_u64(x703, x686, 0xffffffffffffffffL, &x705);
+ { uint64_t x708; uint8_t x709 = _subborrow_u64(x706, x689, 0xb0ffffffffffffffL, &x708);
+ { uint64_t _; uint8_t x712 = _subborrow_u64(x709, x691, 0x0, &_);
+ { uint64_t x713 = cmovznz(x712, x708, x689);
+ { uint64_t x714 = cmovznz(x712, x705, x686);
+ { uint64_t x715 = cmovznz(x712, x702, x683);
+ { uint64_t x716 = cmovznz(x712, x699, x680);
+ { uint64_t x717 = cmovznz(x712, x696, x677);
+ { uint64_t x718 = cmovznz(x712, x693, x674);
+ out[0] = x718;
+ out[1] = x717;
+ out[2] = x716;
+ out[3] = x715;
+ out[4] = x714;
+ out[5] = x713;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e384m79x2e376m1/fenz.c b/src/Specific/montgomery64_2e384m79x2e376m1/fenz.c
index ad3763a14..c93f74257 100644
--- a/src/Specific/montgomery64_2e384m79x2e376m1/fenz.c
+++ b/src/Specific/montgomery64_2e384m79x2e376m1/fenz.c
@@ -1,27 +1,15 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x11 = (x10 | x9);
-{ uint64_t x12 = (x8 | x11);
-{ uint64_t x13 = (x6 | x12);
-{ uint64_t x14 = (x4 | x13);
-{ uint64_t x15 = (x2 | x14);
-out[0] = x15;
-}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x11 = (x10 | x9);
+ { uint64_t x12 = (x8 | x11);
+ { uint64_t x13 = (x6 | x12);
+ { uint64_t x14 = (x4 | x13);
+ { uint64_t x15 = (x2 | x14);
+ out[0] = x15;
+ }}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e384m79x2e376m1/feopp.c b/src/Specific/montgomery64_2e384m79x2e376m1/feopp.c
index eb7d95bb5..b09e3f333 100644
--- a/src/Specific/montgomery64_2e384m79x2e376m1/feopp.c
+++ b/src/Specific/montgomery64_2e384m79x2e376m1/feopp.c
@@ -1,46 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feopp.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
-{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
-{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
-{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
-{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
-{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
-{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
-{ uint64_t x30 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
-{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
-{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
-{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
-{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
-{ uint64_t x50 = (x29 & 0xb0ffffffffffffffL);
-{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
-out[0] = x52;
-out[1] = x48;
-out[2] = x44;
-out[3] = x40;
-out[4] = x36;
-out[5] = x32;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void feopp(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+ { uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+ { uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+ { uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+ { uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+ { uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+ { uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+ { uint64_t x30 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+ { uint64_t x34 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+ { uint64_t x38 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+ { uint64_t x42 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+ { uint64_t x46 = (x29 & 0xffffffffffffffffL);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+ { uint64_t x50 = (x29 & 0xb0ffffffffffffffL);
+ { uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e384m79x2e376m1/fesub.c b/src/Specific/montgomery64_2e384m79x2e376m1/fesub.c
index 5946b461b..c26dcd466 100644
--- a/src/Specific/montgomery64_2e384m79x2e376m1/fesub.c
+++ b/src/Specific/montgomery64_2e384m79x2e376m1/fesub.c
@@ -1,46 +1,40 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesub.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
-{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
-{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
-{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
-{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
-{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
-{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
-{ uint64_t x43 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
-{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
-{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
-{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
-{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
-{ uint64_t x63 = (x42 & 0xb0ffffffffffffffL);
-{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
-out[0] = x65;
-out[1] = x61;
-out[2] = x57;
-out[3] = x53;
-out[4] = x49;
-out[5] = x45;
-}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesub(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+ { uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+ { uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+ { uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+ { uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+ { uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+ { uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+ { uint64_t x43 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+ { uint64_t x47 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+ { uint64_t x51 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+ { uint64_t x55 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+ { uint64_t x59 = (x42 & 0xffffffffffffffffL);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+ { uint64_t x63 = (x42 & 0xb0ffffffffffffffL);
+ { uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+ out[0] = x45;
+ out[1] = x49;
+ out[2] = x53;
+ out[3] = x57;
+ out[4] = x61;
+ out[5] = x65;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e389m21/feadd.c b/src/Specific/montgomery64_2e389m21/feadd.c
index dacadb407..23289045f 100644
--- a/src/Specific/montgomery64_2e389m21/feadd.c
+++ b/src/Specific/montgomery64_2e389m21/feadd.c
@@ -1,50 +1,46 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x5, x17, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x7, x19, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x9, x21, &x35);
-{ uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x11, x23, &x38);
-{ uint64_t x41; uint8_t x42 = _addcarryx_u64(x39, x13, x25, &x41);
-{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x42, x15, x27, &x44);
-{ uint64_t x47; uint8_t x48 = _addcarryx_u64(x45, x14, x26, &x47);
-{ uint64_t x50; uint8_t x51 = _subborrow_u64(0x0, x29, 0xffffffffffffffebL, &x50);
-{ uint64_t x53; uint8_t x54 = _subborrow_u64(x51, x32, 0xffffffffffffffffL, &x53);
-{ uint64_t x56; uint8_t x57 = _subborrow_u64(x54, x35, 0xffffffffffffffffL, &x56);
-{ uint64_t x59; uint8_t x60 = _subborrow_u64(x57, x38, 0xffffffffffffffffL, &x59);
-{ uint64_t x62; uint8_t x63 = _subborrow_u64(x60, x41, 0xffffffffffffffffL, &x62);
-{ uint64_t x65; uint8_t x66 = _subborrow_u64(x63, x44, 0xffffffffffffffffL, &x65);
-{ uint64_t x68; uint8_t x69 = _subborrow_u64(x66, x47, 0x1f, &x68);
-{ uint64_t _; uint8_t x72 = _subborrow_u64(x69, x48, 0x0, &_);
-{ uint64_t x73 = cmovznz(x72, x68, x47);
-{ uint64_t x74 = cmovznz(x72, x65, x44);
-{ uint64_t x75 = cmovznz(x72, x62, x41);
-{ uint64_t x76 = cmovznz(x72, x59, x38);
-{ uint64_t x77 = cmovznz(x72, x56, x35);
-{ uint64_t x78 = cmovznz(x72, x53, x32);
-{ uint64_t x79 = cmovznz(x72, x50, x29);
-out[0] = x73;
-out[1] = x74;
-out[2] = x75;
-out[3] = x76;
-out[4] = x77;
-out[5] = x78;
-out[6] = x79;
-}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void feadd(uint64_t out[7], const uint64_t in1[7], const uint64_t in2[7]) {
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x26 = in2[6];
+ { const uint64_t x27 = in2[5];
+ { const uint64_t x25 = in2[4];
+ { const uint64_t x23 = in2[3];
+ { const uint64_t x21 = in2[2];
+ { const uint64_t x19 = in2[1];
+ { const uint64_t x17 = in2[0];
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x5, x17, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x7, x19, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x9, x21, &x35);
+ { uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x11, x23, &x38);
+ { uint64_t x41; uint8_t x42 = _addcarryx_u64(x39, x13, x25, &x41);
+ { uint64_t x44; uint8_t x45 = _addcarryx_u64(x42, x15, x27, &x44);
+ { uint64_t x47; uint8_t x48 = _addcarryx_u64(x45, x14, x26, &x47);
+ { uint64_t x50; uint8_t x51 = _subborrow_u64(0x0, x29, 0xffffffffffffffebL, &x50);
+ { uint64_t x53; uint8_t x54 = _subborrow_u64(x51, x32, 0xffffffffffffffffL, &x53);
+ { uint64_t x56; uint8_t x57 = _subborrow_u64(x54, x35, 0xffffffffffffffffL, &x56);
+ { uint64_t x59; uint8_t x60 = _subborrow_u64(x57, x38, 0xffffffffffffffffL, &x59);
+ { uint64_t x62; uint8_t x63 = _subborrow_u64(x60, x41, 0xffffffffffffffffL, &x62);
+ { uint64_t x65; uint8_t x66 = _subborrow_u64(x63, x44, 0xffffffffffffffffL, &x65);
+ { uint64_t x68; uint8_t x69 = _subborrow_u64(x66, x47, 0x1f, &x68);
+ { uint64_t _; uint8_t x72 = _subborrow_u64(x69, x48, 0x0, &_);
+ { uint64_t x73 = cmovznz(x72, x68, x47);
+ { uint64_t x74 = cmovznz(x72, x65, x44);
+ { uint64_t x75 = cmovznz(x72, x62, x41);
+ { uint64_t x76 = cmovznz(x72, x59, x38);
+ { uint64_t x77 = cmovznz(x72, x56, x35);
+ { uint64_t x78 = cmovznz(x72, x53, x32);
+ { uint64_t x79 = cmovznz(x72, x50, x29);
+ out[0] = x79;
+ out[1] = x78;
+ out[2] = x77;
+ out[3] = x76;
+ out[4] = x75;
+ out[5] = x74;
+ out[6] = x73;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e389m21/fenz.c b/src/Specific/montgomery64_2e389m21/fenz.c
index 428b446d0..d09ab7ff5 100644
--- a/src/Specific/montgomery64_2e389m21/fenz.c
+++ b/src/Specific/montgomery64_2e389m21/fenz.c
@@ -1,28 +1,17 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x13 = (x12 | x11);
-{ uint64_t x14 = (x10 | x13);
-{ uint64_t x15 = (x8 | x14);
-{ uint64_t x16 = (x6 | x15);
-{ uint64_t x17 = (x4 | x16);
-{ uint64_t x18 = (x2 | x17);
-out[0] = x18;
-}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[7]) {
+ { const uint64_t x11 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x13 = (x12 | x11);
+ { uint64_t x14 = (x10 | x13);
+ { uint64_t x15 = (x8 | x14);
+ { uint64_t x16 = (x6 | x15);
+ { uint64_t x17 = (x4 | x16);
+ { uint64_t x18 = (x2 | x17);
+ out[0] = x18;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e401m31/feadd.c b/src/Specific/montgomery64_2e401m31/feadd.c
index d6917e255..aa164bbcb 100644
--- a/src/Specific/montgomery64_2e401m31/feadd.c
+++ b/src/Specific/montgomery64_2e401m31/feadd.c
@@ -1,50 +1,46 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x5, x17, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x7, x19, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x9, x21, &x35);
-{ uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x11, x23, &x38);
-{ uint64_t x41; uint8_t x42 = _addcarryx_u64(x39, x13, x25, &x41);
-{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x42, x15, x27, &x44);
-{ uint64_t x47; uint8_t x48 = _addcarryx_u64(x45, x14, x26, &x47);
-{ uint64_t x50; uint8_t x51 = _subborrow_u64(0x0, x29, 0xffffffffffffffe1L, &x50);
-{ uint64_t x53; uint8_t x54 = _subborrow_u64(x51, x32, 0xffffffffffffffffL, &x53);
-{ uint64_t x56; uint8_t x57 = _subborrow_u64(x54, x35, 0xffffffffffffffffL, &x56);
-{ uint64_t x59; uint8_t x60 = _subborrow_u64(x57, x38, 0xffffffffffffffffL, &x59);
-{ uint64_t x62; uint8_t x63 = _subborrow_u64(x60, x41, 0xffffffffffffffffL, &x62);
-{ uint64_t x65; uint8_t x66 = _subborrow_u64(x63, x44, 0xffffffffffffffffL, &x65);
-{ uint64_t x68; uint8_t x69 = _subborrow_u64(x66, x47, 0x1ffff, &x68);
-{ uint64_t _; uint8_t x72 = _subborrow_u64(x69, x48, 0x0, &_);
-{ uint64_t x73 = cmovznz(x72, x68, x47);
-{ uint64_t x74 = cmovznz(x72, x65, x44);
-{ uint64_t x75 = cmovznz(x72, x62, x41);
-{ uint64_t x76 = cmovznz(x72, x59, x38);
-{ uint64_t x77 = cmovznz(x72, x56, x35);
-{ uint64_t x78 = cmovznz(x72, x53, x32);
-{ uint64_t x79 = cmovznz(x72, x50, x29);
-out[0] = x73;
-out[1] = x74;
-out[2] = x75;
-out[3] = x76;
-out[4] = x77;
-out[5] = x78;
-out[6] = x79;
-}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void feadd(uint64_t out[7], const uint64_t in1[7], const uint64_t in2[7]) {
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x26 = in2[6];
+ { const uint64_t x27 = in2[5];
+ { const uint64_t x25 = in2[4];
+ { const uint64_t x23 = in2[3];
+ { const uint64_t x21 = in2[2];
+ { const uint64_t x19 = in2[1];
+ { const uint64_t x17 = in2[0];
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x5, x17, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x7, x19, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x9, x21, &x35);
+ { uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x11, x23, &x38);
+ { uint64_t x41; uint8_t x42 = _addcarryx_u64(x39, x13, x25, &x41);
+ { uint64_t x44; uint8_t x45 = _addcarryx_u64(x42, x15, x27, &x44);
+ { uint64_t x47; uint8_t x48 = _addcarryx_u64(x45, x14, x26, &x47);
+ { uint64_t x50; uint8_t x51 = _subborrow_u64(0x0, x29, 0xffffffffffffffe1L, &x50);
+ { uint64_t x53; uint8_t x54 = _subborrow_u64(x51, x32, 0xffffffffffffffffL, &x53);
+ { uint64_t x56; uint8_t x57 = _subborrow_u64(x54, x35, 0xffffffffffffffffL, &x56);
+ { uint64_t x59; uint8_t x60 = _subborrow_u64(x57, x38, 0xffffffffffffffffL, &x59);
+ { uint64_t x62; uint8_t x63 = _subborrow_u64(x60, x41, 0xffffffffffffffffL, &x62);
+ { uint64_t x65; uint8_t x66 = _subborrow_u64(x63, x44, 0xffffffffffffffffL, &x65);
+ { uint64_t x68; uint8_t x69 = _subborrow_u64(x66, x47, 0x1ffff, &x68);
+ { uint64_t _; uint8_t x72 = _subborrow_u64(x69, x48, 0x0, &_);
+ { uint64_t x73 = cmovznz(x72, x68, x47);
+ { uint64_t x74 = cmovznz(x72, x65, x44);
+ { uint64_t x75 = cmovznz(x72, x62, x41);
+ { uint64_t x76 = cmovznz(x72, x59, x38);
+ { uint64_t x77 = cmovznz(x72, x56, x35);
+ { uint64_t x78 = cmovznz(x72, x53, x32);
+ { uint64_t x79 = cmovznz(x72, x50, x29);
+ out[0] = x79;
+ out[1] = x78;
+ out[2] = x77;
+ out[3] = x76;
+ out[4] = x75;
+ out[5] = x74;
+ out[6] = x73;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e401m31/fenz.c b/src/Specific/montgomery64_2e401m31/fenz.c
index 428b446d0..d09ab7ff5 100644
--- a/src/Specific/montgomery64_2e401m31/fenz.c
+++ b/src/Specific/montgomery64_2e401m31/fenz.c
@@ -1,28 +1,17 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x13 = (x12 | x11);
-{ uint64_t x14 = (x10 | x13);
-{ uint64_t x15 = (x8 | x14);
-{ uint64_t x16 = (x6 | x15);
-{ uint64_t x17 = (x4 | x16);
-{ uint64_t x18 = (x2 | x17);
-out[0] = x18;
-}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[7]) {
+ { const uint64_t x11 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x13 = (x12 | x11);
+ { uint64_t x14 = (x10 | x13);
+ { uint64_t x15 = (x8 | x14);
+ { uint64_t x16 = (x6 | x15);
+ { uint64_t x17 = (x4 | x16);
+ { uint64_t x18 = (x2 | x17);
+ out[0] = x18;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e413m21/feadd.c b/src/Specific/montgomery64_2e413m21/feadd.c
index 50ddce7b2..04bef7398 100644
--- a/src/Specific/montgomery64_2e413m21/feadd.c
+++ b/src/Specific/montgomery64_2e413m21/feadd.c
@@ -1,50 +1,46 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x5, x17, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x7, x19, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x9, x21, &x35);
-{ uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x11, x23, &x38);
-{ uint64_t x41; uint8_t x42 = _addcarryx_u64(x39, x13, x25, &x41);
-{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x42, x15, x27, &x44);
-{ uint64_t x47; uint8_t x48 = _addcarryx_u64(x45, x14, x26, &x47);
-{ uint64_t x50; uint8_t x51 = _subborrow_u64(0x0, x29, 0xffffffffffffffebL, &x50);
-{ uint64_t x53; uint8_t x54 = _subborrow_u64(x51, x32, 0xffffffffffffffffL, &x53);
-{ uint64_t x56; uint8_t x57 = _subborrow_u64(x54, x35, 0xffffffffffffffffL, &x56);
-{ uint64_t x59; uint8_t x60 = _subborrow_u64(x57, x38, 0xffffffffffffffffL, &x59);
-{ uint64_t x62; uint8_t x63 = _subborrow_u64(x60, x41, 0xffffffffffffffffL, &x62);
-{ uint64_t x65; uint8_t x66 = _subborrow_u64(x63, x44, 0xffffffffffffffffL, &x65);
-{ uint64_t x68; uint8_t x69 = _subborrow_u64(x66, x47, 0x1fffffff, &x68);
-{ uint64_t _; uint8_t x72 = _subborrow_u64(x69, x48, 0x0, &_);
-{ uint64_t x73 = cmovznz(x72, x68, x47);
-{ uint64_t x74 = cmovznz(x72, x65, x44);
-{ uint64_t x75 = cmovznz(x72, x62, x41);
-{ uint64_t x76 = cmovznz(x72, x59, x38);
-{ uint64_t x77 = cmovznz(x72, x56, x35);
-{ uint64_t x78 = cmovznz(x72, x53, x32);
-{ uint64_t x79 = cmovznz(x72, x50, x29);
-out[0] = x73;
-out[1] = x74;
-out[2] = x75;
-out[3] = x76;
-out[4] = x77;
-out[5] = x78;
-out[6] = x79;
-}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void feadd(uint64_t out[7], const uint64_t in1[7], const uint64_t in2[7]) {
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x26 = in2[6];
+ { const uint64_t x27 = in2[5];
+ { const uint64_t x25 = in2[4];
+ { const uint64_t x23 = in2[3];
+ { const uint64_t x21 = in2[2];
+ { const uint64_t x19 = in2[1];
+ { const uint64_t x17 = in2[0];
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x5, x17, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x7, x19, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x9, x21, &x35);
+ { uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x11, x23, &x38);
+ { uint64_t x41; uint8_t x42 = _addcarryx_u64(x39, x13, x25, &x41);
+ { uint64_t x44; uint8_t x45 = _addcarryx_u64(x42, x15, x27, &x44);
+ { uint64_t x47; uint8_t x48 = _addcarryx_u64(x45, x14, x26, &x47);
+ { uint64_t x50; uint8_t x51 = _subborrow_u64(0x0, x29, 0xffffffffffffffebL, &x50);
+ { uint64_t x53; uint8_t x54 = _subborrow_u64(x51, x32, 0xffffffffffffffffL, &x53);
+ { uint64_t x56; uint8_t x57 = _subborrow_u64(x54, x35, 0xffffffffffffffffL, &x56);
+ { uint64_t x59; uint8_t x60 = _subborrow_u64(x57, x38, 0xffffffffffffffffL, &x59);
+ { uint64_t x62; uint8_t x63 = _subborrow_u64(x60, x41, 0xffffffffffffffffL, &x62);
+ { uint64_t x65; uint8_t x66 = _subborrow_u64(x63, x44, 0xffffffffffffffffL, &x65);
+ { uint64_t x68; uint8_t x69 = _subborrow_u64(x66, x47, 0x1fffffff, &x68);
+ { uint64_t _; uint8_t x72 = _subborrow_u64(x69, x48, 0x0, &_);
+ { uint64_t x73 = cmovznz(x72, x68, x47);
+ { uint64_t x74 = cmovznz(x72, x65, x44);
+ { uint64_t x75 = cmovznz(x72, x62, x41);
+ { uint64_t x76 = cmovznz(x72, x59, x38);
+ { uint64_t x77 = cmovznz(x72, x56, x35);
+ { uint64_t x78 = cmovznz(x72, x53, x32);
+ { uint64_t x79 = cmovznz(x72, x50, x29);
+ out[0] = x79;
+ out[1] = x78;
+ out[2] = x77;
+ out[3] = x76;
+ out[4] = x75;
+ out[5] = x74;
+ out[6] = x73;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e413m21/fenz.c b/src/Specific/montgomery64_2e413m21/fenz.c
index 428b446d0..d09ab7ff5 100644
--- a/src/Specific/montgomery64_2e413m21/fenz.c
+++ b/src/Specific/montgomery64_2e413m21/fenz.c
@@ -1,28 +1,17 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x13 = (x12 | x11);
-{ uint64_t x14 = (x10 | x13);
-{ uint64_t x15 = (x8 | x14);
-{ uint64_t x16 = (x6 | x15);
-{ uint64_t x17 = (x4 | x16);
-{ uint64_t x18 = (x2 | x17);
-out[0] = x18;
-}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[7]) {
+ { const uint64_t x11 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x13 = (x12 | x11);
+ { uint64_t x14 = (x10 | x13);
+ { uint64_t x15 = (x8 | x14);
+ { uint64_t x16 = (x6 | x15);
+ { uint64_t x17 = (x4 | x16);
+ { uint64_t x18 = (x2 | x17);
+ out[0] = x18;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e414m17/feadd.c b/src/Specific/montgomery64_2e414m17/feadd.c
index 2687b8408..db9bba1e3 100644
--- a/src/Specific/montgomery64_2e414m17/feadd.c
+++ b/src/Specific/montgomery64_2e414m17/feadd.c
@@ -1,50 +1,46 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x5, x17, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x7, x19, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x9, x21, &x35);
-{ uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x11, x23, &x38);
-{ uint64_t x41; uint8_t x42 = _addcarryx_u64(x39, x13, x25, &x41);
-{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x42, x15, x27, &x44);
-{ uint64_t x47; uint8_t x48 = _addcarryx_u64(x45, x14, x26, &x47);
-{ uint64_t x50; uint8_t x51 = _subborrow_u64(0x0, x29, 0xffffffffffffffefL, &x50);
-{ uint64_t x53; uint8_t x54 = _subborrow_u64(x51, x32, 0xffffffffffffffffL, &x53);
-{ uint64_t x56; uint8_t x57 = _subborrow_u64(x54, x35, 0xffffffffffffffffL, &x56);
-{ uint64_t x59; uint8_t x60 = _subborrow_u64(x57, x38, 0xffffffffffffffffL, &x59);
-{ uint64_t x62; uint8_t x63 = _subborrow_u64(x60, x41, 0xffffffffffffffffL, &x62);
-{ uint64_t x65; uint8_t x66 = _subborrow_u64(x63, x44, 0xffffffffffffffffL, &x65);
-{ uint64_t x68; uint8_t x69 = _subborrow_u64(x66, x47, 0x3fffffff, &x68);
-{ uint64_t _; uint8_t x72 = _subborrow_u64(x69, x48, 0x0, &_);
-{ uint64_t x73 = cmovznz(x72, x68, x47);
-{ uint64_t x74 = cmovznz(x72, x65, x44);
-{ uint64_t x75 = cmovznz(x72, x62, x41);
-{ uint64_t x76 = cmovznz(x72, x59, x38);
-{ uint64_t x77 = cmovznz(x72, x56, x35);
-{ uint64_t x78 = cmovznz(x72, x53, x32);
-{ uint64_t x79 = cmovznz(x72, x50, x29);
-out[0] = x73;
-out[1] = x74;
-out[2] = x75;
-out[3] = x76;
-out[4] = x77;
-out[5] = x78;
-out[6] = x79;
-}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void feadd(uint64_t out[7], const uint64_t in1[7], const uint64_t in2[7]) {
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x26 = in2[6];
+ { const uint64_t x27 = in2[5];
+ { const uint64_t x25 = in2[4];
+ { const uint64_t x23 = in2[3];
+ { const uint64_t x21 = in2[2];
+ { const uint64_t x19 = in2[1];
+ { const uint64_t x17 = in2[0];
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x5, x17, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x7, x19, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x9, x21, &x35);
+ { uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x11, x23, &x38);
+ { uint64_t x41; uint8_t x42 = _addcarryx_u64(x39, x13, x25, &x41);
+ { uint64_t x44; uint8_t x45 = _addcarryx_u64(x42, x15, x27, &x44);
+ { uint64_t x47; uint8_t x48 = _addcarryx_u64(x45, x14, x26, &x47);
+ { uint64_t x50; uint8_t x51 = _subborrow_u64(0x0, x29, 0xffffffffffffffefL, &x50);
+ { uint64_t x53; uint8_t x54 = _subborrow_u64(x51, x32, 0xffffffffffffffffL, &x53);
+ { uint64_t x56; uint8_t x57 = _subborrow_u64(x54, x35, 0xffffffffffffffffL, &x56);
+ { uint64_t x59; uint8_t x60 = _subborrow_u64(x57, x38, 0xffffffffffffffffL, &x59);
+ { uint64_t x62; uint8_t x63 = _subborrow_u64(x60, x41, 0xffffffffffffffffL, &x62);
+ { uint64_t x65; uint8_t x66 = _subborrow_u64(x63, x44, 0xffffffffffffffffL, &x65);
+ { uint64_t x68; uint8_t x69 = _subborrow_u64(x66, x47, 0x3fffffff, &x68);
+ { uint64_t _; uint8_t x72 = _subborrow_u64(x69, x48, 0x0, &_);
+ { uint64_t x73 = cmovznz(x72, x68, x47);
+ { uint64_t x74 = cmovznz(x72, x65, x44);
+ { uint64_t x75 = cmovznz(x72, x62, x41);
+ { uint64_t x76 = cmovznz(x72, x59, x38);
+ { uint64_t x77 = cmovznz(x72, x56, x35);
+ { uint64_t x78 = cmovznz(x72, x53, x32);
+ { uint64_t x79 = cmovznz(x72, x50, x29);
+ out[0] = x79;
+ out[1] = x78;
+ out[2] = x77;
+ out[3] = x76;
+ out[4] = x75;
+ out[5] = x74;
+ out[6] = x73;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e414m17/fenz.c b/src/Specific/montgomery64_2e414m17/fenz.c
index 428b446d0..d09ab7ff5 100644
--- a/src/Specific/montgomery64_2e414m17/fenz.c
+++ b/src/Specific/montgomery64_2e414m17/fenz.c
@@ -1,28 +1,17 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x13 = (x12 | x11);
-{ uint64_t x14 = (x10 | x13);
-{ uint64_t x15 = (x8 | x14);
-{ uint64_t x16 = (x6 | x15);
-{ uint64_t x17 = (x4 | x16);
-{ uint64_t x18 = (x2 | x17);
-out[0] = x18;
-}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[7]) {
+ { const uint64_t x11 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x13 = (x12 | x11);
+ { uint64_t x14 = (x10 | x13);
+ { uint64_t x15 = (x8 | x14);
+ { uint64_t x16 = (x6 | x15);
+ { uint64_t x17 = (x4 | x16);
+ { uint64_t x18 = (x2 | x17);
+ out[0] = x18;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e416m2e208m1/feadd.c b/src/Specific/montgomery64_2e416m2e208m1/feadd.c
index aff57e2db..5879fad2e 100644
--- a/src/Specific/montgomery64_2e416m2e208m1/feadd.c
+++ b/src/Specific/montgomery64_2e416m2e208m1/feadd.c
@@ -1,50 +1,46 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x5, x17, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x7, x19, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x9, x21, &x35);
-{ uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x11, x23, &x38);
-{ uint64_t x41; uint8_t x42 = _addcarryx_u64(x39, x13, x25, &x41);
-{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x42, x15, x27, &x44);
-{ uint64_t x47; uint8_t x48 = _addcarryx_u64(x45, x14, x26, &x47);
-{ uint64_t x50; uint8_t x51 = _subborrow_u64(0x0, x29, 0xffffffffffffffffL, &x50);
-{ uint64_t x53; uint8_t x54 = _subborrow_u64(x51, x32, 0xffffffffffffffffL, &x53);
-{ uint64_t x56; uint8_t x57 = _subborrow_u64(x54, x35, 0xffffffffffffffffL, &x56);
-{ uint64_t x59; uint8_t x60 = _subborrow_u64(x57, x38, 0xfffffffffffeffffL, &x59);
-{ uint64_t x62; uint8_t x63 = _subborrow_u64(x60, x41, 0xffffffffffffffffL, &x62);
-{ uint64_t x65; uint8_t x66 = _subborrow_u64(x63, x44, 0xffffffffffffffffL, &x65);
-{ uint64_t x68; uint8_t x69 = _subborrow_u64(x66, x47, 0xffffffff, &x68);
-{ uint64_t _; uint8_t x72 = _subborrow_u64(x69, x48, 0x0, &_);
-{ uint64_t x73 = cmovznz(x72, x68, x47);
-{ uint64_t x74 = cmovznz(x72, x65, x44);
-{ uint64_t x75 = cmovznz(x72, x62, x41);
-{ uint64_t x76 = cmovznz(x72, x59, x38);
-{ uint64_t x77 = cmovznz(x72, x56, x35);
-{ uint64_t x78 = cmovznz(x72, x53, x32);
-{ uint64_t x79 = cmovznz(x72, x50, x29);
-out[0] = x73;
-out[1] = x74;
-out[2] = x75;
-out[3] = x76;
-out[4] = x77;
-out[5] = x78;
-out[6] = x79;
-}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void feadd(uint64_t out[7], const uint64_t in1[7], const uint64_t in2[7]) {
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x26 = in2[6];
+ { const uint64_t x27 = in2[5];
+ { const uint64_t x25 = in2[4];
+ { const uint64_t x23 = in2[3];
+ { const uint64_t x21 = in2[2];
+ { const uint64_t x19 = in2[1];
+ { const uint64_t x17 = in2[0];
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x5, x17, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x7, x19, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x9, x21, &x35);
+ { uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x11, x23, &x38);
+ { uint64_t x41; uint8_t x42 = _addcarryx_u64(x39, x13, x25, &x41);
+ { uint64_t x44; uint8_t x45 = _addcarryx_u64(x42, x15, x27, &x44);
+ { uint64_t x47; uint8_t x48 = _addcarryx_u64(x45, x14, x26, &x47);
+ { uint64_t x50; uint8_t x51 = _subborrow_u64(0x0, x29, 0xffffffffffffffffL, &x50);
+ { uint64_t x53; uint8_t x54 = _subborrow_u64(x51, x32, 0xffffffffffffffffL, &x53);
+ { uint64_t x56; uint8_t x57 = _subborrow_u64(x54, x35, 0xffffffffffffffffL, &x56);
+ { uint64_t x59; uint8_t x60 = _subborrow_u64(x57, x38, 0xfffffffffffeffffL, &x59);
+ { uint64_t x62; uint8_t x63 = _subborrow_u64(x60, x41, 0xffffffffffffffffL, &x62);
+ { uint64_t x65; uint8_t x66 = _subborrow_u64(x63, x44, 0xffffffffffffffffL, &x65);
+ { uint64_t x68; uint8_t x69 = _subborrow_u64(x66, x47, 0xffffffff, &x68);
+ { uint64_t _; uint8_t x72 = _subborrow_u64(x69, x48, 0x0, &_);
+ { uint64_t x73 = cmovznz(x72, x68, x47);
+ { uint64_t x74 = cmovznz(x72, x65, x44);
+ { uint64_t x75 = cmovznz(x72, x62, x41);
+ { uint64_t x76 = cmovznz(x72, x59, x38);
+ { uint64_t x77 = cmovznz(x72, x56, x35);
+ { uint64_t x78 = cmovznz(x72, x53, x32);
+ { uint64_t x79 = cmovznz(x72, x50, x29);
+ out[0] = x79;
+ out[1] = x78;
+ out[2] = x77;
+ out[3] = x76;
+ out[4] = x75;
+ out[5] = x74;
+ out[6] = x73;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e416m2e208m1/fenz.c b/src/Specific/montgomery64_2e416m2e208m1/fenz.c
index 428b446d0..d09ab7ff5 100644
--- a/src/Specific/montgomery64_2e416m2e208m1/fenz.c
+++ b/src/Specific/montgomery64_2e416m2e208m1/fenz.c
@@ -1,28 +1,17 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x13 = (x12 | x11);
-{ uint64_t x14 = (x10 | x13);
-{ uint64_t x15 = (x8 | x14);
-{ uint64_t x16 = (x6 | x15);
-{ uint64_t x17 = (x4 | x16);
-{ uint64_t x18 = (x2 | x17);
-out[0] = x18;
-}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[7]) {
+ { const uint64_t x11 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x13 = (x12 | x11);
+ { uint64_t x14 = (x10 | x13);
+ { uint64_t x15 = (x8 | x14);
+ { uint64_t x16 = (x6 | x15);
+ { uint64_t x17 = (x4 | x16);
+ { uint64_t x18 = (x2 | x17);
+ out[0] = x18;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e444m17/feadd.c b/src/Specific/montgomery64_2e444m17/feadd.c
index 264e08caa..f3512f48f 100644
--- a/src/Specific/montgomery64_2e444m17/feadd.c
+++ b/src/Specific/montgomery64_2e444m17/feadd.c
@@ -1,50 +1,46 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x5, x17, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x7, x19, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x9, x21, &x35);
-{ uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x11, x23, &x38);
-{ uint64_t x41; uint8_t x42 = _addcarryx_u64(x39, x13, x25, &x41);
-{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x42, x15, x27, &x44);
-{ uint64_t x47; uint8_t x48 = _addcarryx_u64(x45, x14, x26, &x47);
-{ uint64_t x50; uint8_t x51 = _subborrow_u64(0x0, x29, 0xffffffffffffffefL, &x50);
-{ uint64_t x53; uint8_t x54 = _subborrow_u64(x51, x32, 0xffffffffffffffffL, &x53);
-{ uint64_t x56; uint8_t x57 = _subborrow_u64(x54, x35, 0xffffffffffffffffL, &x56);
-{ uint64_t x59; uint8_t x60 = _subborrow_u64(x57, x38, 0xffffffffffffffffL, &x59);
-{ uint64_t x62; uint8_t x63 = _subborrow_u64(x60, x41, 0xffffffffffffffffL, &x62);
-{ uint64_t x65; uint8_t x66 = _subborrow_u64(x63, x44, 0xffffffffffffffffL, &x65);
-{ uint64_t x68; uint8_t x69 = _subborrow_u64(x66, x47, 0xfffffffffffffff, &x68);
-{ uint64_t _; uint8_t x72 = _subborrow_u64(x69, x48, 0x0, &_);
-{ uint64_t x73 = cmovznz(x72, x68, x47);
-{ uint64_t x74 = cmovznz(x72, x65, x44);
-{ uint64_t x75 = cmovznz(x72, x62, x41);
-{ uint64_t x76 = cmovznz(x72, x59, x38);
-{ uint64_t x77 = cmovznz(x72, x56, x35);
-{ uint64_t x78 = cmovznz(x72, x53, x32);
-{ uint64_t x79 = cmovznz(x72, x50, x29);
-out[0] = x73;
-out[1] = x74;
-out[2] = x75;
-out[3] = x76;
-out[4] = x77;
-out[5] = x78;
-out[6] = x79;
-}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void feadd(uint64_t out[7], const uint64_t in1[7], const uint64_t in2[7]) {
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x26 = in2[6];
+ { const uint64_t x27 = in2[5];
+ { const uint64_t x25 = in2[4];
+ { const uint64_t x23 = in2[3];
+ { const uint64_t x21 = in2[2];
+ { const uint64_t x19 = in2[1];
+ { const uint64_t x17 = in2[0];
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x5, x17, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x7, x19, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x9, x21, &x35);
+ { uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x11, x23, &x38);
+ { uint64_t x41; uint8_t x42 = _addcarryx_u64(x39, x13, x25, &x41);
+ { uint64_t x44; uint8_t x45 = _addcarryx_u64(x42, x15, x27, &x44);
+ { uint64_t x47; uint8_t x48 = _addcarryx_u64(x45, x14, x26, &x47);
+ { uint64_t x50; uint8_t x51 = _subborrow_u64(0x0, x29, 0xffffffffffffffefL, &x50);
+ { uint64_t x53; uint8_t x54 = _subborrow_u64(x51, x32, 0xffffffffffffffffL, &x53);
+ { uint64_t x56; uint8_t x57 = _subborrow_u64(x54, x35, 0xffffffffffffffffL, &x56);
+ { uint64_t x59; uint8_t x60 = _subborrow_u64(x57, x38, 0xffffffffffffffffL, &x59);
+ { uint64_t x62; uint8_t x63 = _subborrow_u64(x60, x41, 0xffffffffffffffffL, &x62);
+ { uint64_t x65; uint8_t x66 = _subborrow_u64(x63, x44, 0xffffffffffffffffL, &x65);
+ { uint64_t x68; uint8_t x69 = _subborrow_u64(x66, x47, 0xfffffffffffffff, &x68);
+ { uint64_t _; uint8_t x72 = _subborrow_u64(x69, x48, 0x0, &_);
+ { uint64_t x73 = cmovznz(x72, x68, x47);
+ { uint64_t x74 = cmovznz(x72, x65, x44);
+ { uint64_t x75 = cmovznz(x72, x62, x41);
+ { uint64_t x76 = cmovznz(x72, x59, x38);
+ { uint64_t x77 = cmovznz(x72, x56, x35);
+ { uint64_t x78 = cmovznz(x72, x53, x32);
+ { uint64_t x79 = cmovznz(x72, x50, x29);
+ out[0] = x79;
+ out[1] = x78;
+ out[2] = x77;
+ out[3] = x76;
+ out[4] = x75;
+ out[5] = x74;
+ out[6] = x73;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e444m17/fenz.c b/src/Specific/montgomery64_2e444m17/fenz.c
index 428b446d0..d09ab7ff5 100644
--- a/src/Specific/montgomery64_2e444m17/fenz.c
+++ b/src/Specific/montgomery64_2e444m17/fenz.c
@@ -1,28 +1,17 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x13 = (x12 | x11);
-{ uint64_t x14 = (x10 | x13);
-{ uint64_t x15 = (x8 | x14);
-{ uint64_t x16 = (x6 | x15);
-{ uint64_t x17 = (x4 | x16);
-{ uint64_t x18 = (x2 | x17);
-out[0] = x18;
-}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[7]) {
+ { const uint64_t x11 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x13 = (x12 | x11);
+ { uint64_t x14 = (x10 | x13);
+ { uint64_t x15 = (x8 | x14);
+ { uint64_t x16 = (x6 | x15);
+ { uint64_t x17 = (x4 | x16);
+ { uint64_t x18 = (x2 | x17);
+ out[0] = x18;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e448m2e224m1/feadd.c b/src/Specific/montgomery64_2e448m2e224m1/feadd.c
index 8b9e9f114..533db1963 100644
--- a/src/Specific/montgomery64_2e448m2e224m1/feadd.c
+++ b/src/Specific/montgomery64_2e448m2e224m1/feadd.c
@@ -1,50 +1,46 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x5, x17, &x29);
-{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x7, x19, &x32);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x9, x21, &x35);
-{ uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x11, x23, &x38);
-{ uint64_t x41; uint8_t x42 = _addcarryx_u64(x39, x13, x25, &x41);
-{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x42, x15, x27, &x44);
-{ uint64_t x47; uint8_t x48 = _addcarryx_u64(x45, x14, x26, &x47);
-{ uint64_t x50; uint8_t x51 = _subborrow_u64(0x0, x29, 0xffffffffffffffffL, &x50);
-{ uint64_t x53; uint8_t x54 = _subborrow_u64(x51, x32, 0xffffffffffffffffL, &x53);
-{ uint64_t x56; uint8_t x57 = _subborrow_u64(x54, x35, 0xffffffffffffffffL, &x56);
-{ uint64_t x59; uint8_t x60 = _subborrow_u64(x57, x38, 0xfffffffeffffffffL, &x59);
-{ uint64_t x62; uint8_t x63 = _subborrow_u64(x60, x41, 0xffffffffffffffffL, &x62);
-{ uint64_t x65; uint8_t x66 = _subborrow_u64(x63, x44, 0xffffffffffffffffL, &x65);
-{ uint64_t x68; uint8_t x69 = _subborrow_u64(x66, x47, 0xffffffffffffffffL, &x68);
-{ uint64_t _; uint8_t x72 = _subborrow_u64(x69, x48, 0x0, &_);
-{ uint64_t x73 = cmovznz(x72, x68, x47);
-{ uint64_t x74 = cmovznz(x72, x65, x44);
-{ uint64_t x75 = cmovznz(x72, x62, x41);
-{ uint64_t x76 = cmovznz(x72, x59, x38);
-{ uint64_t x77 = cmovznz(x72, x56, x35);
-{ uint64_t x78 = cmovznz(x72, x53, x32);
-{ uint64_t x79 = cmovznz(x72, x50, x29);
-out[0] = x73;
-out[1] = x74;
-out[2] = x75;
-out[3] = x76;
-out[4] = x77;
-out[5] = x78;
-out[6] = x79;
-}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void feadd(uint64_t out[7], const uint64_t in1[7], const uint64_t in2[7]) {
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x26 = in2[6];
+ { const uint64_t x27 = in2[5];
+ { const uint64_t x25 = in2[4];
+ { const uint64_t x23 = in2[3];
+ { const uint64_t x21 = in2[2];
+ { const uint64_t x19 = in2[1];
+ { const uint64_t x17 = in2[0];
+ { uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x5, x17, &x29);
+ { uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x7, x19, &x32);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x9, x21, &x35);
+ { uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x11, x23, &x38);
+ { uint64_t x41; uint8_t x42 = _addcarryx_u64(x39, x13, x25, &x41);
+ { uint64_t x44; uint8_t x45 = _addcarryx_u64(x42, x15, x27, &x44);
+ { uint64_t x47; uint8_t x48 = _addcarryx_u64(x45, x14, x26, &x47);
+ { uint64_t x50; uint8_t x51 = _subborrow_u64(0x0, x29, 0xffffffffffffffffL, &x50);
+ { uint64_t x53; uint8_t x54 = _subborrow_u64(x51, x32, 0xffffffffffffffffL, &x53);
+ { uint64_t x56; uint8_t x57 = _subborrow_u64(x54, x35, 0xffffffffffffffffL, &x56);
+ { uint64_t x59; uint8_t x60 = _subborrow_u64(x57, x38, 0xfffffffeffffffffL, &x59);
+ { uint64_t x62; uint8_t x63 = _subborrow_u64(x60, x41, 0xffffffffffffffffL, &x62);
+ { uint64_t x65; uint8_t x66 = _subborrow_u64(x63, x44, 0xffffffffffffffffL, &x65);
+ { uint64_t x68; uint8_t x69 = _subborrow_u64(x66, x47, 0xffffffffffffffffL, &x68);
+ { uint64_t _; uint8_t x72 = _subborrow_u64(x69, x48, 0x0, &_);
+ { uint64_t x73 = cmovznz(x72, x68, x47);
+ { uint64_t x74 = cmovznz(x72, x65, x44);
+ { uint64_t x75 = cmovznz(x72, x62, x41);
+ { uint64_t x76 = cmovznz(x72, x59, x38);
+ { uint64_t x77 = cmovznz(x72, x56, x35);
+ { uint64_t x78 = cmovznz(x72, x53, x32);
+ { uint64_t x79 = cmovznz(x72, x50, x29);
+ out[0] = x79;
+ out[1] = x78;
+ out[2] = x77;
+ out[3] = x76;
+ out[4] = x75;
+ out[5] = x74;
+ out[6] = x73;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e448m2e224m1/fenz.c b/src/Specific/montgomery64_2e448m2e224m1/fenz.c
index 428b446d0..d09ab7ff5 100644
--- a/src/Specific/montgomery64_2e448m2e224m1/fenz.c
+++ b/src/Specific/montgomery64_2e448m2e224m1/fenz.c
@@ -1,28 +1,17 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x13 = (x12 | x11);
-{ uint64_t x14 = (x10 | x13);
-{ uint64_t x15 = (x8 | x14);
-{ uint64_t x16 = (x6 | x15);
-{ uint64_t x17 = (x4 | x16);
-{ uint64_t x18 = (x2 | x17);
-out[0] = x18;
-}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[7]) {
+ { const uint64_t x11 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x13 = (x12 | x11);
+ { uint64_t x14 = (x10 | x13);
+ { uint64_t x15 = (x8 | x14);
+ { uint64_t x16 = (x6 | x15);
+ { uint64_t x17 = (x4 | x16);
+ { uint64_t x18 = (x2 | x17);
+ out[0] = x18;
+ }}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e450m2e225m1/feadd.c b/src/Specific/montgomery64_2e450m2e225m1/feadd.c
index 2e0e920dc..6a47c0239 100644
--- a/src/Specific/montgomery64_2e450m2e225m1/feadd.c
+++ b/src/Specific/montgomery64_2e450m2e225m1/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
-{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
-{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
-{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
-{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffffL, &x57);
-{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
-{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
-{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xfffffffdffffffffL, &x66);
-{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
-{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
-{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
-{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0x3, &x78);
-{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
-{ uint64_t x83 = cmovznz(x82, x78, x54);
-{ uint64_t x84 = cmovznz(x82, x75, x51);
-{ uint64_t x85 = cmovznz(x82, x72, x48);
-{ uint64_t x86 = cmovznz(x82, x69, x45);
-{ uint64_t x87 = cmovznz(x82, x66, x42);
-{ uint64_t x88 = cmovznz(x82, x63, x39);
-{ uint64_t x89 = cmovznz(x82, x60, x36);
-{ uint64_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint64_t out[8], const uint64_t in1[8], const uint64_t in2[8]) {
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x30 = in2[7];
+ { const uint64_t x31 = in2[6];
+ { const uint64_t x29 = in2[5];
+ { const uint64_t x27 = in2[4];
+ { const uint64_t x25 = in2[3];
+ { const uint64_t x23 = in2[2];
+ { const uint64_t x21 = in2[1];
+ { const uint64_t x19 = in2[0];
+ { uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+ { uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+ { uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+ { uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+ { uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffffL, &x57);
+ { uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+ { uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+ { uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xfffffffdffffffffL, &x66);
+ { uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+ { uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+ { uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+ { uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0x3, &x78);
+ { uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+ { uint64_t x83 = cmovznz(x82, x78, x54);
+ { uint64_t x84 = cmovznz(x82, x75, x51);
+ { uint64_t x85 = cmovznz(x82, x72, x48);
+ { uint64_t x86 = cmovznz(x82, x69, x45);
+ { uint64_t x87 = cmovznz(x82, x66, x42);
+ { uint64_t x88 = cmovznz(x82, x63, x39);
+ { uint64_t x89 = cmovznz(x82, x60, x36);
+ { uint64_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e450m2e225m1/fenz.c b/src/Specific/montgomery64_2e450m2e225m1/fenz.c
index 72878ca19..a07b0df9c 100644
--- a/src/Specific/montgomery64_2e450m2e225m1/fenz.c
+++ b/src/Specific/montgomery64_2e450m2e225m1/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x15 = (x14 | x13);
-{ uint64_t x16 = (x12 | x15);
-{ uint64_t x17 = (x10 | x16);
-{ uint64_t x18 = (x8 | x17);
-{ uint64_t x19 = (x6 | x18);
-{ uint64_t x20 = (x4 | x19);
-{ uint64_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x15 = (x14 | x13);
+ { uint64_t x16 = (x12 | x15);
+ { uint64_t x17 = (x10 | x16);
+ { uint64_t x18 = (x8 | x17);
+ { uint64_t x19 = (x6 | x18);
+ { uint64_t x20 = (x4 | x19);
+ { uint64_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e452m3/feadd.c b/src/Specific/montgomery64_2e452m3/feadd.c
index d9febb228..a6c7b9cf8 100644
--- a/src/Specific/montgomery64_2e452m3/feadd.c
+++ b/src/Specific/montgomery64_2e452m3/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
-{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
-{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
-{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
-{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xfffffffffffffffdL, &x57);
-{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
-{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
-{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
-{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
-{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
-{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
-{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0xf, &x78);
-{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
-{ uint64_t x83 = cmovznz(x82, x78, x54);
-{ uint64_t x84 = cmovznz(x82, x75, x51);
-{ uint64_t x85 = cmovznz(x82, x72, x48);
-{ uint64_t x86 = cmovznz(x82, x69, x45);
-{ uint64_t x87 = cmovznz(x82, x66, x42);
-{ uint64_t x88 = cmovznz(x82, x63, x39);
-{ uint64_t x89 = cmovznz(x82, x60, x36);
-{ uint64_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint64_t out[8], const uint64_t in1[8], const uint64_t in2[8]) {
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x30 = in2[7];
+ { const uint64_t x31 = in2[6];
+ { const uint64_t x29 = in2[5];
+ { const uint64_t x27 = in2[4];
+ { const uint64_t x25 = in2[3];
+ { const uint64_t x23 = in2[2];
+ { const uint64_t x21 = in2[1];
+ { const uint64_t x19 = in2[0];
+ { uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+ { uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+ { uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+ { uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+ { uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xfffffffffffffffdL, &x57);
+ { uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+ { uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+ { uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
+ { uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+ { uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+ { uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+ { uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0xf, &x78);
+ { uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+ { uint64_t x83 = cmovznz(x82, x78, x54);
+ { uint64_t x84 = cmovznz(x82, x75, x51);
+ { uint64_t x85 = cmovznz(x82, x72, x48);
+ { uint64_t x86 = cmovznz(x82, x69, x45);
+ { uint64_t x87 = cmovznz(x82, x66, x42);
+ { uint64_t x88 = cmovznz(x82, x63, x39);
+ { uint64_t x89 = cmovznz(x82, x60, x36);
+ { uint64_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e452m3/fenz.c b/src/Specific/montgomery64_2e452m3/fenz.c
index 72878ca19..a07b0df9c 100644
--- a/src/Specific/montgomery64_2e452m3/fenz.c
+++ b/src/Specific/montgomery64_2e452m3/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x15 = (x14 | x13);
-{ uint64_t x16 = (x12 | x15);
-{ uint64_t x17 = (x10 | x16);
-{ uint64_t x18 = (x8 | x17);
-{ uint64_t x19 = (x6 | x18);
-{ uint64_t x20 = (x4 | x19);
-{ uint64_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x15 = (x14 | x13);
+ { uint64_t x16 = (x12 | x15);
+ { uint64_t x17 = (x10 | x16);
+ { uint64_t x18 = (x8 | x17);
+ { uint64_t x19 = (x6 | x18);
+ { uint64_t x20 = (x4 | x19);
+ { uint64_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e468m17/feadd.c b/src/Specific/montgomery64_2e468m17/feadd.c
index 4f46ae1b3..2019e4f52 100644
--- a/src/Specific/montgomery64_2e468m17/feadd.c
+++ b/src/Specific/montgomery64_2e468m17/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
-{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
-{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
-{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
-{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffefL, &x57);
-{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
-{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
-{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
-{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
-{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
-{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
-{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0xfffff, &x78);
-{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
-{ uint64_t x83 = cmovznz(x82, x78, x54);
-{ uint64_t x84 = cmovznz(x82, x75, x51);
-{ uint64_t x85 = cmovznz(x82, x72, x48);
-{ uint64_t x86 = cmovznz(x82, x69, x45);
-{ uint64_t x87 = cmovznz(x82, x66, x42);
-{ uint64_t x88 = cmovznz(x82, x63, x39);
-{ uint64_t x89 = cmovznz(x82, x60, x36);
-{ uint64_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint64_t out[8], const uint64_t in1[8], const uint64_t in2[8]) {
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x30 = in2[7];
+ { const uint64_t x31 = in2[6];
+ { const uint64_t x29 = in2[5];
+ { const uint64_t x27 = in2[4];
+ { const uint64_t x25 = in2[3];
+ { const uint64_t x23 = in2[2];
+ { const uint64_t x21 = in2[1];
+ { const uint64_t x19 = in2[0];
+ { uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+ { uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+ { uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+ { uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+ { uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffefL, &x57);
+ { uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+ { uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+ { uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
+ { uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+ { uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+ { uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+ { uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0xfffff, &x78);
+ { uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+ { uint64_t x83 = cmovznz(x82, x78, x54);
+ { uint64_t x84 = cmovznz(x82, x75, x51);
+ { uint64_t x85 = cmovznz(x82, x72, x48);
+ { uint64_t x86 = cmovznz(x82, x69, x45);
+ { uint64_t x87 = cmovznz(x82, x66, x42);
+ { uint64_t x88 = cmovznz(x82, x63, x39);
+ { uint64_t x89 = cmovznz(x82, x60, x36);
+ { uint64_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e468m17/fenz.c b/src/Specific/montgomery64_2e468m17/fenz.c
index 72878ca19..a07b0df9c 100644
--- a/src/Specific/montgomery64_2e468m17/fenz.c
+++ b/src/Specific/montgomery64_2e468m17/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x15 = (x14 | x13);
-{ uint64_t x16 = (x12 | x15);
-{ uint64_t x17 = (x10 | x16);
-{ uint64_t x18 = (x8 | x17);
-{ uint64_t x19 = (x6 | x18);
-{ uint64_t x20 = (x4 | x19);
-{ uint64_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x15 = (x14 | x13);
+ { uint64_t x16 = (x12 | x15);
+ { uint64_t x17 = (x10 | x16);
+ { uint64_t x18 = (x8 | x17);
+ { uint64_t x19 = (x6 | x18);
+ { uint64_t x20 = (x4 | x19);
+ { uint64_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e480m2e240m1/feadd.c b/src/Specific/montgomery64_2e480m2e240m1/feadd.c
index a8bb326bd..c957fc638 100644
--- a/src/Specific/montgomery64_2e480m2e240m1/feadd.c
+++ b/src/Specific/montgomery64_2e480m2e240m1/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
-{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
-{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
-{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
-{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffffL, &x57);
-{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
-{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
-{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xfffeffffffffffffL, &x66);
-{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
-{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
-{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
-{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0xffffffff, &x78);
-{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
-{ uint64_t x83 = cmovznz(x82, x78, x54);
-{ uint64_t x84 = cmovznz(x82, x75, x51);
-{ uint64_t x85 = cmovznz(x82, x72, x48);
-{ uint64_t x86 = cmovznz(x82, x69, x45);
-{ uint64_t x87 = cmovznz(x82, x66, x42);
-{ uint64_t x88 = cmovznz(x82, x63, x39);
-{ uint64_t x89 = cmovznz(x82, x60, x36);
-{ uint64_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint64_t out[8], const uint64_t in1[8], const uint64_t in2[8]) {
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x30 = in2[7];
+ { const uint64_t x31 = in2[6];
+ { const uint64_t x29 = in2[5];
+ { const uint64_t x27 = in2[4];
+ { const uint64_t x25 = in2[3];
+ { const uint64_t x23 = in2[2];
+ { const uint64_t x21 = in2[1];
+ { const uint64_t x19 = in2[0];
+ { uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+ { uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+ { uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+ { uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+ { uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffffL, &x57);
+ { uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+ { uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+ { uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xfffeffffffffffffL, &x66);
+ { uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+ { uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+ { uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+ { uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0xffffffff, &x78);
+ { uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+ { uint64_t x83 = cmovznz(x82, x78, x54);
+ { uint64_t x84 = cmovznz(x82, x75, x51);
+ { uint64_t x85 = cmovznz(x82, x72, x48);
+ { uint64_t x86 = cmovznz(x82, x69, x45);
+ { uint64_t x87 = cmovznz(x82, x66, x42);
+ { uint64_t x88 = cmovznz(x82, x63, x39);
+ { uint64_t x89 = cmovznz(x82, x60, x36);
+ { uint64_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e480m2e240m1/fenz.c b/src/Specific/montgomery64_2e480m2e240m1/fenz.c
index 72878ca19..a07b0df9c 100644
--- a/src/Specific/montgomery64_2e480m2e240m1/fenz.c
+++ b/src/Specific/montgomery64_2e480m2e240m1/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x15 = (x14 | x13);
-{ uint64_t x16 = (x12 | x15);
-{ uint64_t x17 = (x10 | x16);
-{ uint64_t x18 = (x8 | x17);
-{ uint64_t x19 = (x6 | x18);
-{ uint64_t x20 = (x4 | x19);
-{ uint64_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x15 = (x14 | x13);
+ { uint64_t x16 = (x12 | x15);
+ { uint64_t x17 = (x10 | x16);
+ { uint64_t x18 = (x8 | x17);
+ { uint64_t x19 = (x6 | x18);
+ { uint64_t x20 = (x4 | x19);
+ { uint64_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e488m17/feadd.c b/src/Specific/montgomery64_2e488m17/feadd.c
index 2c13a513b..1e923a46c 100644
--- a/src/Specific/montgomery64_2e488m17/feadd.c
+++ b/src/Specific/montgomery64_2e488m17/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
-{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
-{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
-{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
-{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffefL, &x57);
-{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
-{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
-{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
-{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
-{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
-{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
-{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0xffffffffff, &x78);
-{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
-{ uint64_t x83 = cmovznz(x82, x78, x54);
-{ uint64_t x84 = cmovznz(x82, x75, x51);
-{ uint64_t x85 = cmovznz(x82, x72, x48);
-{ uint64_t x86 = cmovznz(x82, x69, x45);
-{ uint64_t x87 = cmovznz(x82, x66, x42);
-{ uint64_t x88 = cmovznz(x82, x63, x39);
-{ uint64_t x89 = cmovznz(x82, x60, x36);
-{ uint64_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint64_t out[8], const uint64_t in1[8], const uint64_t in2[8]) {
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x30 = in2[7];
+ { const uint64_t x31 = in2[6];
+ { const uint64_t x29 = in2[5];
+ { const uint64_t x27 = in2[4];
+ { const uint64_t x25 = in2[3];
+ { const uint64_t x23 = in2[2];
+ { const uint64_t x21 = in2[1];
+ { const uint64_t x19 = in2[0];
+ { uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+ { uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+ { uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+ { uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+ { uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffefL, &x57);
+ { uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+ { uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+ { uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
+ { uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+ { uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+ { uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+ { uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0xffffffffff, &x78);
+ { uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+ { uint64_t x83 = cmovznz(x82, x78, x54);
+ { uint64_t x84 = cmovznz(x82, x75, x51);
+ { uint64_t x85 = cmovznz(x82, x72, x48);
+ { uint64_t x86 = cmovznz(x82, x69, x45);
+ { uint64_t x87 = cmovznz(x82, x66, x42);
+ { uint64_t x88 = cmovznz(x82, x63, x39);
+ { uint64_t x89 = cmovznz(x82, x60, x36);
+ { uint64_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e488m17/fenz.c b/src/Specific/montgomery64_2e488m17/fenz.c
index 72878ca19..a07b0df9c 100644
--- a/src/Specific/montgomery64_2e488m17/fenz.c
+++ b/src/Specific/montgomery64_2e488m17/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x15 = (x14 | x13);
-{ uint64_t x16 = (x12 | x15);
-{ uint64_t x17 = (x10 | x16);
-{ uint64_t x18 = (x8 | x17);
-{ uint64_t x19 = (x6 | x18);
-{ uint64_t x20 = (x4 | x19);
-{ uint64_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x15 = (x14 | x13);
+ { uint64_t x16 = (x12 | x15);
+ { uint64_t x17 = (x10 | x16);
+ { uint64_t x18 = (x8 | x17);
+ { uint64_t x19 = (x6 | x18);
+ { uint64_t x20 = (x4 | x19);
+ { uint64_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e489m21/feadd.c b/src/Specific/montgomery64_2e489m21/feadd.c
index af6a0c4ec..75c56a2e8 100644
--- a/src/Specific/montgomery64_2e489m21/feadd.c
+++ b/src/Specific/montgomery64_2e489m21/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
-{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
-{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
-{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
-{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffebL, &x57);
-{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
-{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
-{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
-{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
-{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
-{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
-{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0x1ffffffffff, &x78);
-{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
-{ uint64_t x83 = cmovznz(x82, x78, x54);
-{ uint64_t x84 = cmovznz(x82, x75, x51);
-{ uint64_t x85 = cmovznz(x82, x72, x48);
-{ uint64_t x86 = cmovznz(x82, x69, x45);
-{ uint64_t x87 = cmovznz(x82, x66, x42);
-{ uint64_t x88 = cmovznz(x82, x63, x39);
-{ uint64_t x89 = cmovznz(x82, x60, x36);
-{ uint64_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint64_t out[8], const uint64_t in1[8], const uint64_t in2[8]) {
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x30 = in2[7];
+ { const uint64_t x31 = in2[6];
+ { const uint64_t x29 = in2[5];
+ { const uint64_t x27 = in2[4];
+ { const uint64_t x25 = in2[3];
+ { const uint64_t x23 = in2[2];
+ { const uint64_t x21 = in2[1];
+ { const uint64_t x19 = in2[0];
+ { uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+ { uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+ { uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+ { uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+ { uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffebL, &x57);
+ { uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+ { uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+ { uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
+ { uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+ { uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+ { uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+ { uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0x1ffffffffff, &x78);
+ { uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+ { uint64_t x83 = cmovznz(x82, x78, x54);
+ { uint64_t x84 = cmovznz(x82, x75, x51);
+ { uint64_t x85 = cmovznz(x82, x72, x48);
+ { uint64_t x86 = cmovznz(x82, x69, x45);
+ { uint64_t x87 = cmovznz(x82, x66, x42);
+ { uint64_t x88 = cmovznz(x82, x63, x39);
+ { uint64_t x89 = cmovznz(x82, x60, x36);
+ { uint64_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e489m21/fenz.c b/src/Specific/montgomery64_2e489m21/fenz.c
index 72878ca19..a07b0df9c 100644
--- a/src/Specific/montgomery64_2e489m21/fenz.c
+++ b/src/Specific/montgomery64_2e489m21/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x15 = (x14 | x13);
-{ uint64_t x16 = (x12 | x15);
-{ uint64_t x17 = (x10 | x16);
-{ uint64_t x18 = (x8 | x17);
-{ uint64_t x19 = (x6 | x18);
-{ uint64_t x20 = (x4 | x19);
-{ uint64_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x15 = (x14 | x13);
+ { uint64_t x16 = (x12 | x15);
+ { uint64_t x17 = (x10 | x16);
+ { uint64_t x18 = (x8 | x17);
+ { uint64_t x19 = (x6 | x18);
+ { uint64_t x20 = (x4 | x19);
+ { uint64_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e495m31/feadd.c b/src/Specific/montgomery64_2e495m31/feadd.c
index 25392f503..4e490612f 100644
--- a/src/Specific/montgomery64_2e495m31/feadd.c
+++ b/src/Specific/montgomery64_2e495m31/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
-{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
-{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
-{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
-{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffe1L, &x57);
-{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
-{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
-{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
-{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
-{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
-{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
-{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0x7fffffffffff, &x78);
-{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
-{ uint64_t x83 = cmovznz(x82, x78, x54);
-{ uint64_t x84 = cmovznz(x82, x75, x51);
-{ uint64_t x85 = cmovznz(x82, x72, x48);
-{ uint64_t x86 = cmovznz(x82, x69, x45);
-{ uint64_t x87 = cmovznz(x82, x66, x42);
-{ uint64_t x88 = cmovznz(x82, x63, x39);
-{ uint64_t x89 = cmovznz(x82, x60, x36);
-{ uint64_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint64_t out[8], const uint64_t in1[8], const uint64_t in2[8]) {
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x30 = in2[7];
+ { const uint64_t x31 = in2[6];
+ { const uint64_t x29 = in2[5];
+ { const uint64_t x27 = in2[4];
+ { const uint64_t x25 = in2[3];
+ { const uint64_t x23 = in2[2];
+ { const uint64_t x21 = in2[1];
+ { const uint64_t x19 = in2[0];
+ { uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+ { uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+ { uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+ { uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+ { uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffe1L, &x57);
+ { uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+ { uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+ { uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
+ { uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+ { uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+ { uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+ { uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0x7fffffffffff, &x78);
+ { uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+ { uint64_t x83 = cmovznz(x82, x78, x54);
+ { uint64_t x84 = cmovznz(x82, x75, x51);
+ { uint64_t x85 = cmovznz(x82, x72, x48);
+ { uint64_t x86 = cmovznz(x82, x69, x45);
+ { uint64_t x87 = cmovznz(x82, x66, x42);
+ { uint64_t x88 = cmovznz(x82, x63, x39);
+ { uint64_t x89 = cmovznz(x82, x60, x36);
+ { uint64_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e495m31/fenz.c b/src/Specific/montgomery64_2e495m31/fenz.c
index 72878ca19..a07b0df9c 100644
--- a/src/Specific/montgomery64_2e495m31/fenz.c
+++ b/src/Specific/montgomery64_2e495m31/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x15 = (x14 | x13);
-{ uint64_t x16 = (x12 | x15);
-{ uint64_t x17 = (x10 | x16);
-{ uint64_t x18 = (x8 | x17);
-{ uint64_t x19 = (x6 | x18);
-{ uint64_t x20 = (x4 | x19);
-{ uint64_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x15 = (x14 | x13);
+ { uint64_t x16 = (x12 | x15);
+ { uint64_t x17 = (x10 | x16);
+ { uint64_t x18 = (x8 | x17);
+ { uint64_t x19 = (x6 | x18);
+ { uint64_t x20 = (x4 | x19);
+ { uint64_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e510m290x2e496m1/feadd.c b/src/Specific/montgomery64_2e510m290x2e496m1/feadd.c
index 284a47d81..77643d534 100644
--- a/src/Specific/montgomery64_2e510m290x2e496m1/feadd.c
+++ b/src/Specific/montgomery64_2e510m290x2e496m1/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
-{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
-{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
-{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
-{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffffL, &x57);
-{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
-{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
-{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
-{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
-{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
-{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
-{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0x3eddffffffffffff, &x78);
-{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
-{ uint64_t x83 = cmovznz(x82, x78, x54);
-{ uint64_t x84 = cmovznz(x82, x75, x51);
-{ uint64_t x85 = cmovznz(x82, x72, x48);
-{ uint64_t x86 = cmovznz(x82, x69, x45);
-{ uint64_t x87 = cmovznz(x82, x66, x42);
-{ uint64_t x88 = cmovznz(x82, x63, x39);
-{ uint64_t x89 = cmovznz(x82, x60, x36);
-{ uint64_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint64_t out[8], const uint64_t in1[8], const uint64_t in2[8]) {
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x30 = in2[7];
+ { const uint64_t x31 = in2[6];
+ { const uint64_t x29 = in2[5];
+ { const uint64_t x27 = in2[4];
+ { const uint64_t x25 = in2[3];
+ { const uint64_t x23 = in2[2];
+ { const uint64_t x21 = in2[1];
+ { const uint64_t x19 = in2[0];
+ { uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+ { uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+ { uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+ { uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+ { uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffffL, &x57);
+ { uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+ { uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+ { uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
+ { uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+ { uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+ { uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+ { uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0x3eddffffffffffff, &x78);
+ { uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+ { uint64_t x83 = cmovznz(x82, x78, x54);
+ { uint64_t x84 = cmovznz(x82, x75, x51);
+ { uint64_t x85 = cmovznz(x82, x72, x48);
+ { uint64_t x86 = cmovznz(x82, x69, x45);
+ { uint64_t x87 = cmovznz(x82, x66, x42);
+ { uint64_t x88 = cmovznz(x82, x63, x39);
+ { uint64_t x89 = cmovznz(x82, x60, x36);
+ { uint64_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e510m290x2e496m1/fenz.c b/src/Specific/montgomery64_2e510m290x2e496m1/fenz.c
index 72878ca19..a07b0df9c 100644
--- a/src/Specific/montgomery64_2e510m290x2e496m1/fenz.c
+++ b/src/Specific/montgomery64_2e510m290x2e496m1/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x15 = (x14 | x13);
-{ uint64_t x16 = (x12 | x15);
-{ uint64_t x17 = (x10 | x16);
-{ uint64_t x18 = (x8 | x17);
-{ uint64_t x19 = (x6 | x18);
-{ uint64_t x20 = (x4 | x19);
-{ uint64_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x15 = (x14 | x13);
+ { uint64_t x16 = (x12 | x15);
+ { uint64_t x17 = (x10 | x16);
+ { uint64_t x18 = (x8 | x17);
+ { uint64_t x19 = (x6 | x18);
+ { uint64_t x20 = (x4 | x19);
+ { uint64_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e511m187/feadd.c b/src/Specific/montgomery64_2e511m187/feadd.c
index 77d131494..bcd426285 100644
--- a/src/Specific/montgomery64_2e511m187/feadd.c
+++ b/src/Specific/montgomery64_2e511m187/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
-{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
-{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
-{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
-{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffff45L, &x57);
-{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
-{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
-{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
-{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
-{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
-{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
-{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0x7fffffffffffffffL, &x78);
-{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
-{ uint64_t x83 = cmovznz(x82, x78, x54);
-{ uint64_t x84 = cmovznz(x82, x75, x51);
-{ uint64_t x85 = cmovznz(x82, x72, x48);
-{ uint64_t x86 = cmovznz(x82, x69, x45);
-{ uint64_t x87 = cmovznz(x82, x66, x42);
-{ uint64_t x88 = cmovznz(x82, x63, x39);
-{ uint64_t x89 = cmovznz(x82, x60, x36);
-{ uint64_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint64_t out[8], const uint64_t in1[8], const uint64_t in2[8]) {
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x30 = in2[7];
+ { const uint64_t x31 = in2[6];
+ { const uint64_t x29 = in2[5];
+ { const uint64_t x27 = in2[4];
+ { const uint64_t x25 = in2[3];
+ { const uint64_t x23 = in2[2];
+ { const uint64_t x21 = in2[1];
+ { const uint64_t x19 = in2[0];
+ { uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+ { uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+ { uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+ { uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+ { uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffff45L, &x57);
+ { uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+ { uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+ { uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
+ { uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+ { uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+ { uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+ { uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0x7fffffffffffffffL, &x78);
+ { uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+ { uint64_t x83 = cmovznz(x82, x78, x54);
+ { uint64_t x84 = cmovznz(x82, x75, x51);
+ { uint64_t x85 = cmovznz(x82, x72, x48);
+ { uint64_t x86 = cmovznz(x82, x69, x45);
+ { uint64_t x87 = cmovznz(x82, x66, x42);
+ { uint64_t x88 = cmovznz(x82, x63, x39);
+ { uint64_t x89 = cmovznz(x82, x60, x36);
+ { uint64_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e511m187/fenz.c b/src/Specific/montgomery64_2e511m187/fenz.c
index 72878ca19..a07b0df9c 100644
--- a/src/Specific/montgomery64_2e511m187/fenz.c
+++ b/src/Specific/montgomery64_2e511m187/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x15 = (x14 | x13);
-{ uint64_t x16 = (x12 | x15);
-{ uint64_t x17 = (x10 | x16);
-{ uint64_t x18 = (x8 | x17);
-{ uint64_t x19 = (x6 | x18);
-{ uint64_t x20 = (x4 | x19);
-{ uint64_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x15 = (x14 | x13);
+ { uint64_t x16 = (x12 | x15);
+ { uint64_t x17 = (x10 | x16);
+ { uint64_t x18 = (x8 | x17);
+ { uint64_t x19 = (x6 | x18);
+ { uint64_t x20 = (x4 | x19);
+ { uint64_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e511m481/feadd.c b/src/Specific/montgomery64_2e511m481/feadd.c
index b26d4482c..09df91754 100644
--- a/src/Specific/montgomery64_2e511m481/feadd.c
+++ b/src/Specific/montgomery64_2e511m481/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
-{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
-{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
-{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
-{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xfffffffffffffe1fL, &x57);
-{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
-{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
-{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
-{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
-{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
-{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
-{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0x7fffffffffffffffL, &x78);
-{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
-{ uint64_t x83 = cmovznz(x82, x78, x54);
-{ uint64_t x84 = cmovznz(x82, x75, x51);
-{ uint64_t x85 = cmovznz(x82, x72, x48);
-{ uint64_t x86 = cmovznz(x82, x69, x45);
-{ uint64_t x87 = cmovznz(x82, x66, x42);
-{ uint64_t x88 = cmovznz(x82, x63, x39);
-{ uint64_t x89 = cmovznz(x82, x60, x36);
-{ uint64_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint64_t out[8], const uint64_t in1[8], const uint64_t in2[8]) {
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x30 = in2[7];
+ { const uint64_t x31 = in2[6];
+ { const uint64_t x29 = in2[5];
+ { const uint64_t x27 = in2[4];
+ { const uint64_t x25 = in2[3];
+ { const uint64_t x23 = in2[2];
+ { const uint64_t x21 = in2[1];
+ { const uint64_t x19 = in2[0];
+ { uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+ { uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+ { uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+ { uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+ { uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xfffffffffffffe1fL, &x57);
+ { uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+ { uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+ { uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
+ { uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+ { uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+ { uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+ { uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0x7fffffffffffffffL, &x78);
+ { uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+ { uint64_t x83 = cmovznz(x82, x78, x54);
+ { uint64_t x84 = cmovznz(x82, x75, x51);
+ { uint64_t x85 = cmovznz(x82, x72, x48);
+ { uint64_t x86 = cmovznz(x82, x69, x45);
+ { uint64_t x87 = cmovznz(x82, x66, x42);
+ { uint64_t x88 = cmovznz(x82, x63, x39);
+ { uint64_t x89 = cmovznz(x82, x60, x36);
+ { uint64_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e511m481/fenz.c b/src/Specific/montgomery64_2e511m481/fenz.c
index 72878ca19..a07b0df9c 100644
--- a/src/Specific/montgomery64_2e511m481/fenz.c
+++ b/src/Specific/montgomery64_2e511m481/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x15 = (x14 | x13);
-{ uint64_t x16 = (x12 | x15);
-{ uint64_t x17 = (x10 | x16);
-{ uint64_t x18 = (x8 | x17);
-{ uint64_t x19 = (x6 | x18);
-{ uint64_t x20 = (x4 | x19);
-{ uint64_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x15 = (x14 | x13);
+ { uint64_t x16 = (x12 | x15);
+ { uint64_t x17 = (x10 | x16);
+ { uint64_t x18 = (x8 | x17);
+ { uint64_t x19 = (x6 | x18);
+ { uint64_t x20 = (x4 | x19);
+ { uint64_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e512m491x2e496m1/feadd.c b/src/Specific/montgomery64_2e512m491x2e496m1/feadd.c
index 4421e6ab5..3f10ced2e 100644
--- a/src/Specific/montgomery64_2e512m491x2e496m1/feadd.c
+++ b/src/Specific/montgomery64_2e512m491x2e496m1/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
-{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
-{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
-{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
-{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffffL, &x57);
-{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
-{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
-{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
-{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
-{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
-{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
-{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0xfe14ffffffffffffL, &x78);
-{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
-{ uint64_t x83 = cmovznz(x82, x78, x54);
-{ uint64_t x84 = cmovznz(x82, x75, x51);
-{ uint64_t x85 = cmovznz(x82, x72, x48);
-{ uint64_t x86 = cmovznz(x82, x69, x45);
-{ uint64_t x87 = cmovznz(x82, x66, x42);
-{ uint64_t x88 = cmovznz(x82, x63, x39);
-{ uint64_t x89 = cmovznz(x82, x60, x36);
-{ uint64_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint64_t out[8], const uint64_t in1[8], const uint64_t in2[8]) {
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x30 = in2[7];
+ { const uint64_t x31 = in2[6];
+ { const uint64_t x29 = in2[5];
+ { const uint64_t x27 = in2[4];
+ { const uint64_t x25 = in2[3];
+ { const uint64_t x23 = in2[2];
+ { const uint64_t x21 = in2[1];
+ { const uint64_t x19 = in2[0];
+ { uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+ { uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+ { uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+ { uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+ { uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffffL, &x57);
+ { uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+ { uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+ { uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
+ { uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+ { uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+ { uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+ { uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0xfe14ffffffffffffL, &x78);
+ { uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+ { uint64_t x83 = cmovznz(x82, x78, x54);
+ { uint64_t x84 = cmovznz(x82, x75, x51);
+ { uint64_t x85 = cmovznz(x82, x72, x48);
+ { uint64_t x86 = cmovznz(x82, x69, x45);
+ { uint64_t x87 = cmovznz(x82, x66, x42);
+ { uint64_t x88 = cmovznz(x82, x63, x39);
+ { uint64_t x89 = cmovznz(x82, x60, x36);
+ { uint64_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e512m491x2e496m1/fenz.c b/src/Specific/montgomery64_2e512m491x2e496m1/fenz.c
index 72878ca19..a07b0df9c 100644
--- a/src/Specific/montgomery64_2e512m491x2e496m1/fenz.c
+++ b/src/Specific/montgomery64_2e512m491x2e496m1/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x15 = (x14 | x13);
-{ uint64_t x16 = (x12 | x15);
-{ uint64_t x17 = (x10 | x16);
-{ uint64_t x18 = (x8 | x17);
-{ uint64_t x19 = (x6 | x18);
-{ uint64_t x20 = (x4 | x19);
-{ uint64_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x15 = (x14 | x13);
+ { uint64_t x16 = (x12 | x15);
+ { uint64_t x17 = (x10 | x16);
+ { uint64_t x18 = (x8 | x17);
+ { uint64_t x19 = (x6 | x18);
+ { uint64_t x20 = (x4 | x19);
+ { uint64_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e512m569/feadd.c b/src/Specific/montgomery64_2e512m569/feadd.c
index d6e5329b2..3dd38b658 100644
--- a/src/Specific/montgomery64_2e512m569/feadd.c
+++ b/src/Specific/montgomery64_2e512m569/feadd.c
@@ -1,54 +1,52 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
-{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
-{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
-{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
-{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
-{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
-{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
-{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xfffffffffffffdc7L, &x57);
-{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
-{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
-{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
-{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
-{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
-{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
-{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0xffffffffffffffffL, &x78);
-{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
-{ uint64_t x83 = cmovznz(x82, x78, x54);
-{ uint64_t x84 = cmovznz(x82, x75, x51);
-{ uint64_t x85 = cmovznz(x82, x72, x48);
-{ uint64_t x86 = cmovznz(x82, x69, x45);
-{ uint64_t x87 = cmovznz(x82, x66, x42);
-{ uint64_t x88 = cmovznz(x82, x63, x39);
-{ uint64_t x89 = cmovznz(x82, x60, x36);
-{ uint64_t x90 = cmovznz(x82, x57, x33);
-out[0] = x83;
-out[1] = x84;
-out[2] = x85;
-out[3] = x86;
-out[4] = x87;
-out[5] = x88;
-out[6] = x89;
-out[7] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void feadd(uint64_t out[8], const uint64_t in1[8], const uint64_t in2[8]) {
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x30 = in2[7];
+ { const uint64_t x31 = in2[6];
+ { const uint64_t x29 = in2[5];
+ { const uint64_t x27 = in2[4];
+ { const uint64_t x25 = in2[3];
+ { const uint64_t x23 = in2[2];
+ { const uint64_t x21 = in2[1];
+ { const uint64_t x19 = in2[0];
+ { uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+ { uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+ { uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+ { uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+ { uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+ { uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+ { uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+ { uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xfffffffffffffdc7L, &x57);
+ { uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+ { uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+ { uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
+ { uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+ { uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+ { uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+ { uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0xffffffffffffffffL, &x78);
+ { uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+ { uint64_t x83 = cmovznz(x82, x78, x54);
+ { uint64_t x84 = cmovznz(x82, x75, x51);
+ { uint64_t x85 = cmovznz(x82, x72, x48);
+ { uint64_t x86 = cmovznz(x82, x69, x45);
+ { uint64_t x87 = cmovznz(x82, x66, x42);
+ { uint64_t x88 = cmovznz(x82, x63, x39);
+ { uint64_t x89 = cmovznz(x82, x60, x36);
+ { uint64_t x90 = cmovznz(x82, x57, x33);
+ out[0] = x90;
+ out[1] = x89;
+ out[2] = x88;
+ out[3] = x87;
+ out[4] = x86;
+ out[5] = x85;
+ out[6] = x84;
+ out[7] = x83;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e512m569/fenz.c b/src/Specific/montgomery64_2e512m569/fenz.c
index 72878ca19..a07b0df9c 100644
--- a/src/Specific/montgomery64_2e512m569/fenz.c
+++ b/src/Specific/montgomery64_2e512m569/fenz.c
@@ -1,29 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x15 = (x14 | x13);
-{ uint64_t x16 = (x12 | x15);
-{ uint64_t x17 = (x10 | x16);
-{ uint64_t x18 = (x8 | x17);
-{ uint64_t x19 = (x6 | x18);
-{ uint64_t x20 = (x4 | x19);
-{ uint64_t x21 = (x2 | x20);
-out[0] = x21;
-}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x15 = (x14 | x13);
+ { uint64_t x16 = (x12 | x15);
+ { uint64_t x17 = (x10 | x16);
+ { uint64_t x18 = (x8 | x17);
+ { uint64_t x19 = (x6 | x18);
+ { uint64_t x20 = (x4 | x19);
+ { uint64_t x21 = (x2 | x20);
+ out[0] = x21;
+ }}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e521m1/feadd.c b/src/Specific/montgomery64_2e521m1/feadd.c
index 2a8646049..8dd56736b 100644
--- a/src/Specific/montgomery64_2e521m1/feadd.c
+++ b/src/Specific/montgomery64_2e521m1/feadd.c
@@ -1,58 +1,58 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "feadd.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline feadd(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
-{ uint64_t x37; uint8_t x38 = _addcarryx_u64(0x0, x5, x21, &x37);
-{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x7, x23, &x40);
-{ uint64_t x43; uint8_t x44 = _addcarryx_u64(x41, x9, x25, &x43);
-{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x11, x27, &x46);
-{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x13, x29, &x49);
-{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x15, x31, &x52);
-{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x17, x33, &x55);
-{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x19, x35, &x58);
-{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x18, x34, &x61);
-{ uint64_t x64; uint8_t x65 = _subborrow_u64(0x0, x37, 0xffffffffffffffffL, &x64);
-{ uint64_t x67; uint8_t x68 = _subborrow_u64(x65, x40, 0xffffffffffffffffL, &x67);
-{ uint64_t x70; uint8_t x71 = _subborrow_u64(x68, x43, 0xffffffffffffffffL, &x70);
-{ uint64_t x73; uint8_t x74 = _subborrow_u64(x71, x46, 0xffffffffffffffffL, &x73);
-{ uint64_t x76; uint8_t x77 = _subborrow_u64(x74, x49, 0xffffffffffffffffL, &x76);
-{ uint64_t x79; uint8_t x80 = _subborrow_u64(x77, x52, 0xffffffffffffffffL, &x79);
-{ uint64_t x82; uint8_t x83 = _subborrow_u64(x80, x55, 0xffffffffffffffffL, &x82);
-{ uint64_t x85; uint8_t x86 = _subborrow_u64(x83, x58, 0xffffffffffffffffL, &x85);
-{ uint64_t x88; uint8_t x89 = _subborrow_u64(x86, x61, 0x1ff, &x88);
-{ uint64_t _; uint8_t x92 = _subborrow_u64(x89, x62, 0x0, &_);
-{ uint64_t x93 = cmovznz(x92, x88, x61);
-{ uint64_t x94 = cmovznz(x92, x85, x58);
-{ uint64_t x95 = cmovznz(x92, x82, x55);
-{ uint64_t x96 = cmovznz(x92, x79, x52);
-{ uint64_t x97 = cmovznz(x92, x76, x49);
-{ uint64_t x98 = cmovznz(x92, x73, x46);
-{ uint64_t x99 = cmovznz(x92, x70, x43);
-{ uint64_t x100 = cmovznz(x92, x67, x40);
-{ uint64_t x101 = cmovznz(x92, x64, x37);
-out[0] = x93;
-out[1] = x94;
-out[2] = x95;
-out[3] = x96;
-out[4] = x97;
-out[5] = x98;
-out[6] = x99;
-out[7] = x100;
-out[8] = x101;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void feadd(uint64_t out[9], const uint64_t in1[9], const uint64_t in2[9]) {
+ { const uint64_t x18 = in1[8];
+ { const uint64_t x19 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x34 = in2[8];
+ { const uint64_t x35 = in2[7];
+ { const uint64_t x33 = in2[6];
+ { const uint64_t x31 = in2[5];
+ { const uint64_t x29 = in2[4];
+ { const uint64_t x27 = in2[3];
+ { const uint64_t x25 = in2[2];
+ { const uint64_t x23 = in2[1];
+ { const uint64_t x21 = in2[0];
+ { uint64_t x37; uint8_t x38 = _addcarryx_u64(0x0, x5, x21, &x37);
+ { uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x7, x23, &x40);
+ { uint64_t x43; uint8_t x44 = _addcarryx_u64(x41, x9, x25, &x43);
+ { uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x11, x27, &x46);
+ { uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x13, x29, &x49);
+ { uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x15, x31, &x52);
+ { uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x17, x33, &x55);
+ { uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x19, x35, &x58);
+ { uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x18, x34, &x61);
+ { uint64_t x64; uint8_t x65 = _subborrow_u64(0x0, x37, 0xffffffffffffffffL, &x64);
+ { uint64_t x67; uint8_t x68 = _subborrow_u64(x65, x40, 0xffffffffffffffffL, &x67);
+ { uint64_t x70; uint8_t x71 = _subborrow_u64(x68, x43, 0xffffffffffffffffL, &x70);
+ { uint64_t x73; uint8_t x74 = _subborrow_u64(x71, x46, 0xffffffffffffffffL, &x73);
+ { uint64_t x76; uint8_t x77 = _subborrow_u64(x74, x49, 0xffffffffffffffffL, &x76);
+ { uint64_t x79; uint8_t x80 = _subborrow_u64(x77, x52, 0xffffffffffffffffL, &x79);
+ { uint64_t x82; uint8_t x83 = _subborrow_u64(x80, x55, 0xffffffffffffffffL, &x82);
+ { uint64_t x85; uint8_t x86 = _subborrow_u64(x83, x58, 0xffffffffffffffffL, &x85);
+ { uint64_t x88; uint8_t x89 = _subborrow_u64(x86, x61, 0x1ff, &x88);
+ { uint64_t _; uint8_t x92 = _subborrow_u64(x89, x62, 0x0, &_);
+ { uint64_t x93 = cmovznz(x92, x88, x61);
+ { uint64_t x94 = cmovznz(x92, x85, x58);
+ { uint64_t x95 = cmovznz(x92, x82, x55);
+ { uint64_t x96 = cmovznz(x92, x79, x52);
+ { uint64_t x97 = cmovznz(x92, x76, x49);
+ { uint64_t x98 = cmovznz(x92, x73, x46);
+ { uint64_t x99 = cmovznz(x92, x70, x43);
+ { uint64_t x100 = cmovznz(x92, x67, x40);
+ { uint64_t x101 = cmovznz(x92, x64, x37);
+ out[0] = x101;
+ out[1] = x100;
+ out[2] = x99;
+ out[3] = x98;
+ out[4] = x97;
+ out[5] = x96;
+ out[6] = x95;
+ out[7] = x94;
+ out[8] = x93;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/montgomery64_2e521m1/fenz.c b/src/Specific/montgomery64_2e521m1/fenz.c
index d49e22dda..eb4cd2856 100644
--- a/src/Specific/montgomery64_2e521m1/fenz.c
+++ b/src/Specific/montgomery64_2e521m1/fenz.c
@@ -1,30 +1,21 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fenz.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fenz(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x17 = (x16 | x15);
-{ uint64_t x18 = (x14 | x17);
-{ uint64_t x19 = (x12 | x18);
-{ uint64_t x20 = (x10 | x19);
-{ uint64_t x21 = (x8 | x20);
-{ uint64_t x22 = (x6 | x21);
-{ uint64_t x23 = (x4 | x22);
-{ uint64_t x24 = (x2 | x23);
-out[0] = x24;
-}}}}}}}}
-// caller: uint64_t out[1];
+static void fenz(ReturnType uint64_t out[1], const uint64_t in1[9]) {
+ { const uint64_t x15 = in1[8];
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x17 = (x16 | x15);
+ { uint64_t x18 = (x14 | x17);
+ { uint64_t x19 = (x12 | x18);
+ { uint64_t x20 = (x10 | x19);
+ { uint64_t x21 = (x8 | x20);
+ { uint64_t x22 = (x6 | x21);
+ { uint64_t x23 = (x4 | x22);
+ { uint64_t x24 = (x2 | x23);
+ out[0] = x24;
+ }}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e127m1/femul.c b/src/Specific/solinas32_2e127m1/femul.c
index 8f793432e..f51c6ba17 100644
--- a/src/Specific/solinas32_2e127m1/femul.c
+++ b/src/Specific/solinas32_2e127m1/femul.c
@@ -1,56 +1,50 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x24 = (((uint64_t)x5 * x22) + ((0x2 * ((uint64_t)x7 * x23)) + ((0x2 * ((uint64_t)x9 * x21)) + ((0x2 * ((uint64_t)x11 * x19)) + ((0x2 * ((uint64_t)x13 * x17)) + ((uint64_t)x12 * x15))))));
-{ uint64_t x25 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + ((0x2 * ((uint64_t)x9 * x19)) + ((0x2 * ((uint64_t)x11 * x17)) + ((uint64_t)x13 * x15))))) + ((uint64_t)x12 * x22));
-{ uint64_t x26 = ((((uint64_t)x5 * x21) + ((0x2 * ((uint64_t)x7 * x19)) + ((0x2 * ((uint64_t)x9 * x17)) + ((uint64_t)x11 * x15)))) + (((uint64_t)x13 * x22) + ((uint64_t)x12 * x23)));
-{ uint64_t x27 = ((((uint64_t)x5 * x19) + ((0x2 * ((uint64_t)x7 * x17)) + ((uint64_t)x9 * x15))) + (((uint64_t)x11 * x22) + (((uint64_t)x13 * x23) + ((uint64_t)x12 * x21))));
-{ uint64_t x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) + (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19)))));
-{ uint64_t x29 = (((uint64_t)x5 * x15) + ((0x2 * ((uint64_t)x7 * x22)) + ((0x2 * ((uint64_t)x9 * x23)) + ((0x2 * ((uint64_t)x11 * x21)) + ((0x2 * ((uint64_t)x13 * x19)) + (0x2 * ((uint64_t)x12 * x17)))))));
-{ uint32_t x30 = (uint32_t) (x29 >> 0x16);
-{ uint32_t x31 = ((uint32_t)x29 & 0x3fffff);
-{ uint64_t x32 = (x30 + x28);
-{ uint32_t x33 = (uint32_t) (x32 >> 0x15);
-{ uint32_t x34 = ((uint32_t)x32 & 0x1fffff);
-{ uint64_t x35 = (x33 + x27);
-{ uint32_t x36 = (uint32_t) (x35 >> 0x15);
-{ uint32_t x37 = ((uint32_t)x35 & 0x1fffff);
-{ uint64_t x38 = (x36 + x26);
-{ uint32_t x39 = (uint32_t) (x38 >> 0x15);
-{ uint32_t x40 = ((uint32_t)x38 & 0x1fffff);
-{ uint64_t x41 = (x39 + x25);
-{ uint32_t x42 = (uint32_t) (x41 >> 0x15);
-{ uint32_t x43 = ((uint32_t)x41 & 0x1fffff);
-{ uint64_t x44 = (x42 + x24);
-{ uint32_t x45 = (uint32_t) (x44 >> 0x15);
-{ uint32_t x46 = ((uint32_t)x44 & 0x1fffff);
-{ uint32_t x47 = (x31 + x45);
-{ uint32_t x48 = (x47 >> 0x16);
-{ uint32_t x49 = (x47 & 0x3fffff);
-{ uint32_t x50 = (x48 + x34);
-{ uint32_t x51 = (x50 >> 0x15);
-{ uint32_t x52 = (x50 & 0x1fffff);
-out[0] = x46;
-out[1] = x43;
-out[2] = x40;
-out[3] = x51 + x37;
-out[4] = x52;
-out[5] = x49;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint64_t x24 = (((uint64_t)x5 * x22) + ((0x2 * ((uint64_t)x7 * x23)) + ((0x2 * ((uint64_t)x9 * x21)) + ((0x2 * ((uint64_t)x11 * x19)) + ((0x2 * ((uint64_t)x13 * x17)) + ((uint64_t)x12 * x15))))));
+ { uint64_t x25 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + ((0x2 * ((uint64_t)x9 * x19)) + ((0x2 * ((uint64_t)x11 * x17)) + ((uint64_t)x13 * x15))))) + ((uint64_t)x12 * x22));
+ { uint64_t x26 = ((((uint64_t)x5 * x21) + ((0x2 * ((uint64_t)x7 * x19)) + ((0x2 * ((uint64_t)x9 * x17)) + ((uint64_t)x11 * x15)))) + (((uint64_t)x13 * x22) + ((uint64_t)x12 * x23)));
+ { uint64_t x27 = ((((uint64_t)x5 * x19) + ((0x2 * ((uint64_t)x7 * x17)) + ((uint64_t)x9 * x15))) + (((uint64_t)x11 * x22) + (((uint64_t)x13 * x23) + ((uint64_t)x12 * x21))));
+ { uint64_t x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) + (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19)))));
+ { uint64_t x29 = (((uint64_t)x5 * x15) + ((0x2 * ((uint64_t)x7 * x22)) + ((0x2 * ((uint64_t)x9 * x23)) + ((0x2 * ((uint64_t)x11 * x21)) + ((0x2 * ((uint64_t)x13 * x19)) + (0x2 * ((uint64_t)x12 * x17)))))));
+ { uint32_t x30 = (uint32_t) (x29 >> 0x16);
+ { uint32_t x31 = ((uint32_t)x29 & 0x3fffff);
+ { uint64_t x32 = (x30 + x28);
+ { uint32_t x33 = (uint32_t) (x32 >> 0x15);
+ { uint32_t x34 = ((uint32_t)x32 & 0x1fffff);
+ { uint64_t x35 = (x33 + x27);
+ { uint32_t x36 = (uint32_t) (x35 >> 0x15);
+ { uint32_t x37 = ((uint32_t)x35 & 0x1fffff);
+ { uint64_t x38 = (x36 + x26);
+ { uint32_t x39 = (uint32_t) (x38 >> 0x15);
+ { uint32_t x40 = ((uint32_t)x38 & 0x1fffff);
+ { uint64_t x41 = (x39 + x25);
+ { uint32_t x42 = (uint32_t) (x41 >> 0x15);
+ { uint32_t x43 = ((uint32_t)x41 & 0x1fffff);
+ { uint64_t x44 = (x42 + x24);
+ { uint32_t x45 = (uint32_t) (x44 >> 0x15);
+ { uint32_t x46 = ((uint32_t)x44 & 0x1fffff);
+ { uint32_t x47 = (x31 + x45);
+ { uint32_t x48 = (x47 >> 0x16);
+ { uint32_t x49 = (x47 & 0x3fffff);
+ { uint32_t x50 = (x48 + x34);
+ { uint32_t x51 = (x50 >> 0x15);
+ { uint32_t x52 = (x50 & 0x1fffff);
+ out[0] = x49;
+ out[1] = x52;
+ out[2] = (x51 + x37);
+ out[3] = x40;
+ out[4] = x43;
+ out[5] = x46;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e127m1/fesquare.c b/src/Specific/solinas32_2e127m1/fesquare.c
index 8657902e8..517b5ec31 100644
--- a/src/Specific/solinas32_2e127m1/fesquare.c
+++ b/src/Specific/solinas32_2e127m1/fesquare.c
@@ -1,56 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x11 = (((uint64_t)x2 * x9) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x9 * x2))))));
-{ uint64_t x12 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + ((uint64_t)x9 * x9));
-{ uint64_t x13 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (((uint64_t)x10 * x9) + ((uint64_t)x9 * x10)));
-{ uint64_t x14 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (((uint64_t)x8 * x9) + (((uint64_t)x10 * x10) + ((uint64_t)x9 * x8))));
-{ uint64_t x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6)))));
-{ uint64_t x16 = (((uint64_t)x2 * x2) + ((0x2 * ((uint64_t)x4 * x9)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + (0x2 * ((uint64_t)x9 * x4)))))));
-{ uint32_t x17 = (uint32_t) (x16 >> 0x16);
-{ uint32_t x18 = ((uint32_t)x16 & 0x3fffff);
-{ uint64_t x19 = (x17 + x15);
-{ uint32_t x20 = (uint32_t) (x19 >> 0x15);
-{ uint32_t x21 = ((uint32_t)x19 & 0x1fffff);
-{ uint64_t x22 = (x20 + x14);
-{ uint32_t x23 = (uint32_t) (x22 >> 0x15);
-{ uint32_t x24 = ((uint32_t)x22 & 0x1fffff);
-{ uint64_t x25 = (x23 + x13);
-{ uint32_t x26 = (uint32_t) (x25 >> 0x15);
-{ uint32_t x27 = ((uint32_t)x25 & 0x1fffff);
-{ uint64_t x28 = (x26 + x12);
-{ uint32_t x29 = (uint32_t) (x28 >> 0x15);
-{ uint32_t x30 = ((uint32_t)x28 & 0x1fffff);
-{ uint64_t x31 = (x29 + x11);
-{ uint32_t x32 = (uint32_t) (x31 >> 0x15);
-{ uint32_t x33 = ((uint32_t)x31 & 0x1fffff);
-{ uint32_t x34 = (x18 + x32);
-{ uint32_t x35 = (x34 >> 0x16);
-{ uint32_t x36 = (x34 & 0x3fffff);
-{ uint32_t x37 = (x35 + x21);
-{ uint32_t x38 = (x37 >> 0x15);
-{ uint32_t x39 = (x37 & 0x1fffff);
-out[0] = x33;
-out[1] = x30;
-out[2] = x27;
-out[3] = x38 + x24;
-out[4] = x39;
-out[5] = x36;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesquare(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x11 = (((uint64_t)x2 * x9) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x9 * x2))))));
+ { uint64_t x12 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + ((uint64_t)x9 * x9));
+ { uint64_t x13 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (((uint64_t)x10 * x9) + ((uint64_t)x9 * x10)));
+ { uint64_t x14 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (((uint64_t)x8 * x9) + (((uint64_t)x10 * x10) + ((uint64_t)x9 * x8))));
+ { uint64_t x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6)))));
+ { uint64_t x16 = (((uint64_t)x2 * x2) + ((0x2 * ((uint64_t)x4 * x9)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + (0x2 * ((uint64_t)x9 * x4)))))));
+ { uint32_t x17 = (uint32_t) (x16 >> 0x16);
+ { uint32_t x18 = ((uint32_t)x16 & 0x3fffff);
+ { uint64_t x19 = (x17 + x15);
+ { uint32_t x20 = (uint32_t) (x19 >> 0x15);
+ { uint32_t x21 = ((uint32_t)x19 & 0x1fffff);
+ { uint64_t x22 = (x20 + x14);
+ { uint32_t x23 = (uint32_t) (x22 >> 0x15);
+ { uint32_t x24 = ((uint32_t)x22 & 0x1fffff);
+ { uint64_t x25 = (x23 + x13);
+ { uint32_t x26 = (uint32_t) (x25 >> 0x15);
+ { uint32_t x27 = ((uint32_t)x25 & 0x1fffff);
+ { uint64_t x28 = (x26 + x12);
+ { uint32_t x29 = (uint32_t) (x28 >> 0x15);
+ { uint32_t x30 = ((uint32_t)x28 & 0x1fffff);
+ { uint64_t x31 = (x29 + x11);
+ { uint32_t x32 = (uint32_t) (x31 >> 0x15);
+ { uint32_t x33 = ((uint32_t)x31 & 0x1fffff);
+ { uint32_t x34 = (x18 + x32);
+ { uint32_t x35 = (x34 >> 0x16);
+ { uint32_t x36 = (x34 & 0x3fffff);
+ { uint32_t x37 = (x35 + x21);
+ { uint32_t x38 = (x37 >> 0x15);
+ { uint32_t x39 = (x37 & 0x1fffff);
+ out[0] = x36;
+ out[1] = x39;
+ out[2] = (x38 + x24);
+ out[3] = x27;
+ out[4] = x30;
+ out[5] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e127m1/freeze.c b/src/Specific/solinas32_2e127m1/freeze.c
index d8b2cd358..d99f9ec14 100644
--- a/src/Specific/solinas32_2e127m1/freeze.c
+++ b/src/Specific/solinas32_2e127m1/freeze.c
@@ -1,25 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x12;
-out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 22 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3fffff;;
+static void freeze(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x3fffff);
+ { uint32_t x15, uint8_t x16 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x13, Return x4, 0x1fffff);
+ { uint32_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x16, Return x6, 0x1fffff);
+ { uint32_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x19, Return x8, 0x1fffff);
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x22, Return x10, 0x1fffff);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x9, 0x1fffff);
+ { uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+ { uint32_t x30 = (x29 & 0x3fffff);
+ { uint32_t x32, uint8_t x33 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x12, Return x30);
+ { uint32_t x34 = (x29 & 0x1fffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x15, Return x34);
+ { uint32_t x38 = (x29 & 0x1fffff);
+ { uint32_t x40, uint8_t x41 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x18, Return x38);
+ { uint32_t x42 = (x29 & 0x1fffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x41, Return x21, Return x42);
+ { uint32_t x46 = (x29 & 0x1fffff);
+ { uint32_t x48, uint8_t x49 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x24, Return x46);
+ { uint32_t x50 = (x29 & 0x1fffff);
+ { uint32_t x52, uint8_t _ = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x49, Return x27, Return x50);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e129m25/femul.c b/src/Specific/solinas32_2e129m25/femul.c
index 03718ede2..9ed23c4c2 100644
--- a/src/Specific/solinas32_2e129m25/femul.c
+++ b/src/Specific/solinas32_2e129m25/femul.c
@@ -1,56 +1,50 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x24 = (((uint64_t)x5 * x22) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + (((uint64_t)x13 * x17) + ((uint64_t)x12 * x15))))));
-{ uint64_t x25 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + (((uint64_t)x9 * x19) + ((0x2 * ((uint64_t)x11 * x17)) + ((uint64_t)x13 * x15))))) + (0x19 * (0x2 * ((uint64_t)x12 * x22))));
-{ uint64_t x26 = ((((uint64_t)x5 * x21) + (((uint64_t)x7 * x19) + (((uint64_t)x9 * x17) + ((uint64_t)x11 * x15)))) + (0x19 * (((uint64_t)x13 * x22) + ((uint64_t)x12 * x23))));
-{ uint64_t x27 = ((((uint64_t)x5 * x19) + ((0x2 * ((uint64_t)x7 * x17)) + ((uint64_t)x9 * x15))) + (0x19 * ((0x2 * ((uint64_t)x11 * x22)) + (((uint64_t)x13 * x23) + (0x2 * ((uint64_t)x12 * x21))))));
-{ uint64_t x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) + (0x19 * (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19))))));
-{ uint64_t x29 = (((uint64_t)x5 * x15) + (0x19 * ((0x2 * ((uint64_t)x7 * x22)) + (((uint64_t)x9 * x23) + ((0x2 * ((uint64_t)x11 * x21)) + (((uint64_t)x13 * x19) + (0x2 * ((uint64_t)x12 * x17))))))));
-{ uint32_t x30 = (uint32_t) (x29 >> 0x16);
-{ uint32_t x31 = ((uint32_t)x29 & 0x3fffff);
-{ uint64_t x32 = (x30 + x28);
-{ uint64_t x33 = (x32 >> 0x15);
-{ uint32_t x34 = ((uint32_t)x32 & 0x1fffff);
-{ uint64_t x35 = (x33 + x27);
-{ uint32_t x36 = (uint32_t) (x35 >> 0x16);
-{ uint32_t x37 = ((uint32_t)x35 & 0x3fffff);
-{ uint64_t x38 = (x36 + x26);
-{ uint32_t x39 = (uint32_t) (x38 >> 0x15);
-{ uint32_t x40 = ((uint32_t)x38 & 0x1fffff);
-{ uint64_t x41 = (x39 + x25);
-{ uint32_t x42 = (uint32_t) (x41 >> 0x16);
-{ uint32_t x43 = ((uint32_t)x41 & 0x3fffff);
-{ uint64_t x44 = (x42 + x24);
-{ uint32_t x45 = (uint32_t) (x44 >> 0x15);
-{ uint32_t x46 = ((uint32_t)x44 & 0x1fffff);
-{ uint64_t x47 = (x31 + ((uint64_t)0x19 * x45));
-{ uint32_t x48 = (uint32_t) (x47 >> 0x16);
-{ uint32_t x49 = ((uint32_t)x47 & 0x3fffff);
-{ uint32_t x50 = (x48 + x34);
-{ uint32_t x51 = (x50 >> 0x15);
-{ uint32_t x52 = (x50 & 0x1fffff);
-out[0] = x46;
-out[1] = x43;
-out[2] = x40;
-out[3] = x51 + x37;
-out[4] = x52;
-out[5] = x49;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint64_t x24 = (((uint64_t)x5 * x22) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + (((uint64_t)x13 * x17) + ((uint64_t)x12 * x15))))));
+ { uint64_t x25 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + (((uint64_t)x9 * x19) + ((0x2 * ((uint64_t)x11 * x17)) + ((uint64_t)x13 * x15))))) + (0x19 * (0x2 * ((uint64_t)x12 * x22))));
+ { uint64_t x26 = ((((uint64_t)x5 * x21) + (((uint64_t)x7 * x19) + (((uint64_t)x9 * x17) + ((uint64_t)x11 * x15)))) + (0x19 * (((uint64_t)x13 * x22) + ((uint64_t)x12 * x23))));
+ { uint64_t x27 = ((((uint64_t)x5 * x19) + ((0x2 * ((uint64_t)x7 * x17)) + ((uint64_t)x9 * x15))) + (0x19 * ((0x2 * ((uint64_t)x11 * x22)) + (((uint64_t)x13 * x23) + (0x2 * ((uint64_t)x12 * x21))))));
+ { uint64_t x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) + (0x19 * (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19))))));
+ { uint64_t x29 = (((uint64_t)x5 * x15) + (0x19 * ((0x2 * ((uint64_t)x7 * x22)) + (((uint64_t)x9 * x23) + ((0x2 * ((uint64_t)x11 * x21)) + (((uint64_t)x13 * x19) + (0x2 * ((uint64_t)x12 * x17))))))));
+ { uint32_t x30 = (uint32_t) (x29 >> 0x16);
+ { uint32_t x31 = ((uint32_t)x29 & 0x3fffff);
+ { uint64_t x32 = (x30 + x28);
+ { uint64_t x33 = (x32 >> 0x15);
+ { uint32_t x34 = ((uint32_t)x32 & 0x1fffff);
+ { uint64_t x35 = (x33 + x27);
+ { uint32_t x36 = (uint32_t) (x35 >> 0x16);
+ { uint32_t x37 = ((uint32_t)x35 & 0x3fffff);
+ { uint64_t x38 = (x36 + x26);
+ { uint32_t x39 = (uint32_t) (x38 >> 0x15);
+ { uint32_t x40 = ((uint32_t)x38 & 0x1fffff);
+ { uint64_t x41 = (x39 + x25);
+ { uint32_t x42 = (uint32_t) (x41 >> 0x16);
+ { uint32_t x43 = ((uint32_t)x41 & 0x3fffff);
+ { uint64_t x44 = (x42 + x24);
+ { uint32_t x45 = (uint32_t) (x44 >> 0x15);
+ { uint32_t x46 = ((uint32_t)x44 & 0x1fffff);
+ { uint64_t x47 = (x31 + ((uint64_t)0x19 * x45));
+ { uint32_t x48 = (uint32_t) (x47 >> 0x16);
+ { uint32_t x49 = ((uint32_t)x47 & 0x3fffff);
+ { uint32_t x50 = (x48 + x34);
+ { uint32_t x51 = (x50 >> 0x15);
+ { uint32_t x52 = (x50 & 0x1fffff);
+ out[0] = x49;
+ out[1] = x52;
+ out[2] = (x51 + x37);
+ out[3] = x40;
+ out[4] = x43;
+ out[5] = x46;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e129m25/fesquare.c b/src/Specific/solinas32_2e129m25/fesquare.c
index a54caf2f1..efa4f8e92 100644
--- a/src/Specific/solinas32_2e129m25/fesquare.c
+++ b/src/Specific/solinas32_2e129m25/fesquare.c
@@ -1,56 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x11 = (((uint64_t)x2 * x9) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x9 * x2))))));
-{ uint64_t x12 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + (((uint64_t)x6 * x6) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x19 * (0x2 * ((uint64_t)x9 * x9))));
-{ uint64_t x13 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x19 * (((uint64_t)x10 * x9) + ((uint64_t)x9 * x10))));
-{ uint64_t x14 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x19 * ((0x2 * ((uint64_t)x8 * x9)) + (((uint64_t)x10 * x10) + (0x2 * ((uint64_t)x9 * x8))))));
-{ uint64_t x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x19 * (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6))))));
-{ uint64_t x16 = (((uint64_t)x2 * x2) + (0x19 * ((0x2 * ((uint64_t)x4 * x9)) + (((uint64_t)x6 * x10) + ((0x2 * ((uint64_t)x8 * x8)) + (((uint64_t)x10 * x6) + (0x2 * ((uint64_t)x9 * x4))))))));
-{ uint32_t x17 = (uint32_t) (x16 >> 0x16);
-{ uint32_t x18 = ((uint32_t)x16 & 0x3fffff);
-{ uint64_t x19 = (x17 + x15);
-{ uint64_t x20 = (x19 >> 0x15);
-{ uint32_t x21 = ((uint32_t)x19 & 0x1fffff);
-{ uint64_t x22 = (x20 + x14);
-{ uint32_t x23 = (uint32_t) (x22 >> 0x16);
-{ uint32_t x24 = ((uint32_t)x22 & 0x3fffff);
-{ uint64_t x25 = (x23 + x13);
-{ uint32_t x26 = (uint32_t) (x25 >> 0x15);
-{ uint32_t x27 = ((uint32_t)x25 & 0x1fffff);
-{ uint64_t x28 = (x26 + x12);
-{ uint32_t x29 = (uint32_t) (x28 >> 0x16);
-{ uint32_t x30 = ((uint32_t)x28 & 0x3fffff);
-{ uint64_t x31 = (x29 + x11);
-{ uint32_t x32 = (uint32_t) (x31 >> 0x15);
-{ uint32_t x33 = ((uint32_t)x31 & 0x1fffff);
-{ uint64_t x34 = (x18 + ((uint64_t)0x19 * x32));
-{ uint32_t x35 = (uint32_t) (x34 >> 0x16);
-{ uint32_t x36 = ((uint32_t)x34 & 0x3fffff);
-{ uint32_t x37 = (x35 + x21);
-{ uint32_t x38 = (x37 >> 0x15);
-{ uint32_t x39 = (x37 & 0x1fffff);
-out[0] = x33;
-out[1] = x30;
-out[2] = x27;
-out[3] = x38 + x24;
-out[4] = x39;
-out[5] = x36;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesquare(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x11 = (((uint64_t)x2 * x9) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x9 * x2))))));
+ { uint64_t x12 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + (((uint64_t)x6 * x6) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x19 * (0x2 * ((uint64_t)x9 * x9))));
+ { uint64_t x13 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x19 * (((uint64_t)x10 * x9) + ((uint64_t)x9 * x10))));
+ { uint64_t x14 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x19 * ((0x2 * ((uint64_t)x8 * x9)) + (((uint64_t)x10 * x10) + (0x2 * ((uint64_t)x9 * x8))))));
+ { uint64_t x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x19 * (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6))))));
+ { uint64_t x16 = (((uint64_t)x2 * x2) + (0x19 * ((0x2 * ((uint64_t)x4 * x9)) + (((uint64_t)x6 * x10) + ((0x2 * ((uint64_t)x8 * x8)) + (((uint64_t)x10 * x6) + (0x2 * ((uint64_t)x9 * x4))))))));
+ { uint32_t x17 = (uint32_t) (x16 >> 0x16);
+ { uint32_t x18 = ((uint32_t)x16 & 0x3fffff);
+ { uint64_t x19 = (x17 + x15);
+ { uint64_t x20 = (x19 >> 0x15);
+ { uint32_t x21 = ((uint32_t)x19 & 0x1fffff);
+ { uint64_t x22 = (x20 + x14);
+ { uint32_t x23 = (uint32_t) (x22 >> 0x16);
+ { uint32_t x24 = ((uint32_t)x22 & 0x3fffff);
+ { uint64_t x25 = (x23 + x13);
+ { uint32_t x26 = (uint32_t) (x25 >> 0x15);
+ { uint32_t x27 = ((uint32_t)x25 & 0x1fffff);
+ { uint64_t x28 = (x26 + x12);
+ { uint32_t x29 = (uint32_t) (x28 >> 0x16);
+ { uint32_t x30 = ((uint32_t)x28 & 0x3fffff);
+ { uint64_t x31 = (x29 + x11);
+ { uint32_t x32 = (uint32_t) (x31 >> 0x15);
+ { uint32_t x33 = ((uint32_t)x31 & 0x1fffff);
+ { uint64_t x34 = (x18 + ((uint64_t)0x19 * x32));
+ { uint32_t x35 = (uint32_t) (x34 >> 0x16);
+ { uint32_t x36 = ((uint32_t)x34 & 0x3fffff);
+ { uint32_t x37 = (x35 + x21);
+ { uint32_t x38 = (x37 >> 0x15);
+ { uint32_t x39 = (x37 & 0x1fffff);
+ out[0] = x36;
+ out[1] = x39;
+ out[2] = (x38 + x24);
+ out[3] = x27;
+ out[4] = x30;
+ out[5] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e129m25/freeze.c b/src/Specific/solinas32_2e129m25/freeze.c
index 386e29800..02e8390db 100644
--- a/src/Specific/solinas32_2e129m25/freeze.c
+++ b/src/Specific/solinas32_2e129m25/freeze.c
@@ -1,25 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x12;
-out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 22 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3fffe7;;
+static void freeze(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x3fffe7);
+ { uint32_t x15, uint8_t x16 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x13, Return x4, 0x1fffff);
+ { uint32_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x16, Return x6, 0x3fffff);
+ { uint32_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x19, Return x8, 0x1fffff);
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x22, Return x10, 0x3fffff);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x9, 0x1fffff);
+ { uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+ { uint32_t x30 = (x29 & 0x3fffe7);
+ { uint32_t x32, uint8_t x33 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x12, Return x30);
+ { uint32_t x34 = (x29 & 0x1fffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x15, Return x34);
+ { uint32_t x38 = (x29 & 0x3fffff);
+ { uint32_t x40, uint8_t x41 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x18, Return x38);
+ { uint32_t x42 = (x29 & 0x1fffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x41, Return x21, Return x42);
+ { uint32_t x46 = (x29 & 0x3fffff);
+ { uint32_t x48, uint8_t x49 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x24, Return x46);
+ { uint32_t x50 = (x29 & 0x1fffff);
+ { uint32_t x52, uint8_t _ = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x49, Return x27, Return x50);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e130m5/femul.c b/src/Specific/solinas32_2e130m5/femul.c
index db0994a56..597a8eddd 100644
--- a/src/Specific/solinas32_2e130m5/femul.c
+++ b/src/Specific/solinas32_2e130m5/femul.c
@@ -1,51 +1,43 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint64_t x20 = (((uint64_t)x5 * x18) + (((uint64_t)x7 * x19) + (((uint64_t)x9 * x17) + (((uint64_t)x11 * x15) + ((uint64_t)x10 * x13)))));
-{ uint64_t x21 = ((((uint64_t)x5 * x19) + (((uint64_t)x7 * x17) + (((uint64_t)x9 * x15) + ((uint64_t)x11 * x13)))) + (0x5 * ((uint64_t)x10 * x18)));
-{ uint64_t x22 = ((((uint64_t)x5 * x17) + (((uint64_t)x7 * x15) + ((uint64_t)x9 * x13))) + (0x5 * (((uint64_t)x11 * x18) + ((uint64_t)x10 * x19))));
-{ uint64_t x23 = ((((uint64_t)x5 * x15) + ((uint64_t)x7 * x13)) + (0x5 * (((uint64_t)x9 * x18) + (((uint64_t)x11 * x19) + ((uint64_t)x10 * x17)))));
-{ uint64_t x24 = (((uint64_t)x5 * x13) + (0x5 * (((uint64_t)x7 * x18) + (((uint64_t)x9 * x19) + (((uint64_t)x11 * x17) + ((uint64_t)x10 * x15))))));
-{ uint64_t x25 = (x24 >> 0x1a);
-{ uint32_t x26 = ((uint32_t)x24 & 0x3ffffff);
-{ uint64_t x27 = (x25 + x23);
-{ uint64_t x28 = (x27 >> 0x1a);
-{ uint32_t x29 = ((uint32_t)x27 & 0x3ffffff);
-{ uint64_t x30 = (x28 + x22);
-{ uint64_t x31 = (x30 >> 0x1a);
-{ uint32_t x32 = ((uint32_t)x30 & 0x3ffffff);
-{ uint64_t x33 = (x31 + x21);
-{ uint64_t x34 = (x33 >> 0x1a);
-{ uint32_t x35 = ((uint32_t)x33 & 0x3ffffff);
-{ uint64_t x36 = (x34 + x20);
-{ uint32_t x37 = (uint32_t) (x36 >> 0x1a);
-{ uint32_t x38 = ((uint32_t)x36 & 0x3ffffff);
-{ uint64_t x39 = (x26 + ((uint64_t)0x5 * x37));
-{ uint32_t x40 = (uint32_t) (x39 >> 0x1a);
-{ uint32_t x41 = ((uint32_t)x39 & 0x3ffffff);
-{ uint32_t x42 = (x40 + x29);
-{ uint32_t x43 = (x42 >> 0x1a);
-{ uint32_t x44 = (x42 & 0x3ffffff);
-out[0] = x38;
-out[1] = x35;
-out[2] = x43 + x32;
-out[3] = x44;
-out[4] = x41;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { uint64_t x20 = (((uint64_t)x5 * x18) + (((uint64_t)x7 * x19) + (((uint64_t)x9 * x17) + (((uint64_t)x11 * x15) + ((uint64_t)x10 * x13)))));
+ { uint64_t x21 = ((((uint64_t)x5 * x19) + (((uint64_t)x7 * x17) + (((uint64_t)x9 * x15) + ((uint64_t)x11 * x13)))) + (0x5 * ((uint64_t)x10 * x18)));
+ { uint64_t x22 = ((((uint64_t)x5 * x17) + (((uint64_t)x7 * x15) + ((uint64_t)x9 * x13))) + (0x5 * (((uint64_t)x11 * x18) + ((uint64_t)x10 * x19))));
+ { uint64_t x23 = ((((uint64_t)x5 * x15) + ((uint64_t)x7 * x13)) + (0x5 * (((uint64_t)x9 * x18) + (((uint64_t)x11 * x19) + ((uint64_t)x10 * x17)))));
+ { uint64_t x24 = (((uint64_t)x5 * x13) + (0x5 * (((uint64_t)x7 * x18) + (((uint64_t)x9 * x19) + (((uint64_t)x11 * x17) + ((uint64_t)x10 * x15))))));
+ { uint64_t x25 = (x24 >> 0x1a);
+ { uint32_t x26 = ((uint32_t)x24 & 0x3ffffff);
+ { uint64_t x27 = (x25 + x23);
+ { uint64_t x28 = (x27 >> 0x1a);
+ { uint32_t x29 = ((uint32_t)x27 & 0x3ffffff);
+ { uint64_t x30 = (x28 + x22);
+ { uint64_t x31 = (x30 >> 0x1a);
+ { uint32_t x32 = ((uint32_t)x30 & 0x3ffffff);
+ { uint64_t x33 = (x31 + x21);
+ { uint64_t x34 = (x33 >> 0x1a);
+ { uint32_t x35 = ((uint32_t)x33 & 0x3ffffff);
+ { uint64_t x36 = (x34 + x20);
+ { uint32_t x37 = (uint32_t) (x36 >> 0x1a);
+ { uint32_t x38 = ((uint32_t)x36 & 0x3ffffff);
+ { uint64_t x39 = (x26 + ((uint64_t)0x5 * x37));
+ { uint32_t x40 = (uint32_t) (x39 >> 0x1a);
+ { uint32_t x41 = ((uint32_t)x39 & 0x3ffffff);
+ { uint32_t x42 = (x40 + x29);
+ { uint32_t x43 = (x42 >> 0x1a);
+ { uint32_t x44 = (x42 & 0x3ffffff);
+ out[0] = x41;
+ out[1] = x44;
+ out[2] = (x43 + x32);
+ out[3] = x35;
+ out[4] = x38;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e130m5/fesquare.c b/src/Specific/solinas32_2e130m5/fesquare.c
index d54648358..9fc0158e4 100644
--- a/src/Specific/solinas32_2e130m5/fesquare.c
+++ b/src/Specific/solinas32_2e130m5/fesquare.c
@@ -1,51 +1,38 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x9 = (((uint64_t)x2 * x7) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x7 * x2)))));
-{ uint64_t x10 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x5 * ((uint64_t)x7 * x7)));
-{ uint64_t x11 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x5 * (((uint64_t)x8 * x7) + ((uint64_t)x7 * x8))));
-{ uint64_t x12 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x5 * (((uint64_t)x6 * x7) + (((uint64_t)x8 * x8) + ((uint64_t)x7 * x6)))));
-{ uint64_t x13 = (((uint64_t)x2 * x2) + (0x5 * (((uint64_t)x4 * x7) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((uint64_t)x7 * x4))))));
-{ uint64_t x14 = (x13 >> 0x1a);
-{ uint32_t x15 = ((uint32_t)x13 & 0x3ffffff);
-{ uint64_t x16 = (x14 + x12);
-{ uint64_t x17 = (x16 >> 0x1a);
-{ uint32_t x18 = ((uint32_t)x16 & 0x3ffffff);
-{ uint64_t x19 = (x17 + x11);
-{ uint64_t x20 = (x19 >> 0x1a);
-{ uint32_t x21 = ((uint32_t)x19 & 0x3ffffff);
-{ uint64_t x22 = (x20 + x10);
-{ uint64_t x23 = (x22 >> 0x1a);
-{ uint32_t x24 = ((uint32_t)x22 & 0x3ffffff);
-{ uint64_t x25 = (x23 + x9);
-{ uint32_t x26 = (uint32_t) (x25 >> 0x1a);
-{ uint32_t x27 = ((uint32_t)x25 & 0x3ffffff);
-{ uint64_t x28 = (x15 + ((uint64_t)0x5 * x26));
-{ uint32_t x29 = (uint32_t) (x28 >> 0x1a);
-{ uint32_t x30 = ((uint32_t)x28 & 0x3ffffff);
-{ uint32_t x31 = (x29 + x18);
-{ uint32_t x32 = (x31 >> 0x1a);
-{ uint32_t x33 = (x31 & 0x3ffffff);
-out[0] = x27;
-out[1] = x24;
-out[2] = x32 + x21;
-out[3] = x33;
-out[4] = x30;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesquare(uint32_t out[5], const uint32_t in1[5]) {
+ { const uint32_t x7 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x9 = (((uint64_t)x2 * x7) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x7 * x2)))));
+ { uint64_t x10 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x5 * ((uint64_t)x7 * x7)));
+ { uint64_t x11 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x5 * (((uint64_t)x8 * x7) + ((uint64_t)x7 * x8))));
+ { uint64_t x12 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x5 * (((uint64_t)x6 * x7) + (((uint64_t)x8 * x8) + ((uint64_t)x7 * x6)))));
+ { uint64_t x13 = (((uint64_t)x2 * x2) + (0x5 * (((uint64_t)x4 * x7) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((uint64_t)x7 * x4))))));
+ { uint64_t x14 = (x13 >> 0x1a);
+ { uint32_t x15 = ((uint32_t)x13 & 0x3ffffff);
+ { uint64_t x16 = (x14 + x12);
+ { uint64_t x17 = (x16 >> 0x1a);
+ { uint32_t x18 = ((uint32_t)x16 & 0x3ffffff);
+ { uint64_t x19 = (x17 + x11);
+ { uint64_t x20 = (x19 >> 0x1a);
+ { uint32_t x21 = ((uint32_t)x19 & 0x3ffffff);
+ { uint64_t x22 = (x20 + x10);
+ { uint64_t x23 = (x22 >> 0x1a);
+ { uint32_t x24 = ((uint32_t)x22 & 0x3ffffff);
+ { uint64_t x25 = (x23 + x9);
+ { uint32_t x26 = (uint32_t) (x25 >> 0x1a);
+ { uint32_t x27 = ((uint32_t)x25 & 0x3ffffff);
+ { uint64_t x28 = (x15 + ((uint64_t)0x5 * x26));
+ { uint32_t x29 = (uint32_t) (x28 >> 0x1a);
+ { uint32_t x30 = ((uint32_t)x28 & 0x3ffffff);
+ { uint32_t x31 = (x29 + x18);
+ { uint32_t x32 = (x31 >> 0x1a);
+ { uint32_t x33 = (x31 & 0x3ffffff);
+ out[0] = x30;
+ out[1] = x33;
+ out[2] = (x32 + x21);
+ out[3] = x24;
+ out[4] = x27;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e130m5/freeze.c b/src/Specific/solinas32_2e130m5/freeze.c
index 38ccd3bc8..5a39bcd67 100644
--- a/src/Specific/solinas32_2e130m5/freeze.c
+++ b/src/Specific/solinas32_2e130m5/freeze.c
@@ -1,25 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x10;
-out[1] = uint8_t x11 = Op Syntax.SubWithGetBorrow 26 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3fffffb;;
+static void freeze(uint32_t out[5], const uint32_t in1[5]) {
+ { const uint32_t x7 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x10, uint8_t x11 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x3fffffb);
+ { uint32_t x13, uint8_t x14 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x11, Return x4, 0x3ffffff);
+ { uint32_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x14, Return x6, 0x3ffffff);
+ { uint32_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x17, Return x8, 0x3ffffff);
+ { uint32_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x20, Return x7, 0x3ffffff);
+ { uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
+ { uint32_t x25 = (x24 & 0x3fffffb);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x10, Return x25);
+ { uint32_t x29 = (x24 & 0x3ffffff);
+ { uint32_t x31, uint8_t x32 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x28, Return x13, Return x29);
+ { uint32_t x33 = (x24 & 0x3ffffff);
+ { uint32_t x35, uint8_t x36 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x32, Return x16, Return x33);
+ { uint32_t x37 = (x24 & 0x3ffffff);
+ { uint32_t x39, uint8_t x40 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x36, Return x19, Return x37);
+ { uint32_t x41 = (x24 & 0x3ffffff);
+ { uint32_t x43, uint8_t _ = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x40, Return x22, Return x41);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e137m13/femul.c b/src/Specific/solinas32_2e137m13/femul.c
index 67bc63d33..f2a69d86a 100644
--- a/src/Specific/solinas32_2e137m13/femul.c
+++ b/src/Specific/solinas32_2e137m13/femul.c
@@ -1,66 +1,64 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint64_t x32 = (((uint64_t)x5 * x30) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((0x2 * ((uint64_t)x11 * x27)) + ((0x2 * ((uint64_t)x13 * x25)) + ((0x2 * ((uint64_t)x15 * x23)) + ((0x2 * ((uint64_t)x17 * x21)) + ((uint64_t)x16 * x19))))))));
-{ uint64_t x33 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((0x2 * ((uint64_t)x9 * x27)) + ((0x2 * ((uint64_t)x11 * x25)) + ((0x2 * ((uint64_t)x13 * x23)) + ((0x2 * ((uint64_t)x15 * x21)) + ((uint64_t)x17 * x19))))))) + (0xd * ((uint64_t)x16 * x30)));
-{ uint64_t x34 = ((((uint64_t)x5 * x29) + ((0x2 * ((uint64_t)x7 * x27)) + ((0x2 * ((uint64_t)x9 * x25)) + ((0x2 * ((uint64_t)x11 * x23)) + ((0x2 * ((uint64_t)x13 * x21)) + ((uint64_t)x15 * x19)))))) + (0xd * (((uint64_t)x17 * x30) + ((uint64_t)x16 * x31))));
-{ uint64_t x35 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((0x2 * ((uint64_t)x11 * x21)) + ((uint64_t)x13 * x19))))) + (0xd * (((uint64_t)x15 * x30) + (((uint64_t)x17 * x31) + ((uint64_t)x16 * x29)))));
-{ uint64_t x36 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((0x2 * ((uint64_t)x9 * x21)) + ((uint64_t)x11 * x19)))) + (0xd * (((uint64_t)x13 * x30) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x16 * x27))))));
-{ uint64_t x37 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + ((uint64_t)x9 * x19))) + (0xd * (((uint64_t)x11 * x30) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + (((uint64_t)x17 * x27) + ((uint64_t)x16 * x25)))))));
-{ uint64_t x38 = ((((uint64_t)x5 * x21) + ((uint64_t)x7 * x19)) + (0xd * (((uint64_t)x9 * x30) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x16 * x23))))))));
-{ uint64_t x39 = (((uint64_t)x5 * x19) + (0xd * ((0x2 * ((uint64_t)x7 * x30)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((0x2 * ((uint64_t)x13 * x27)) + ((0x2 * ((uint64_t)x15 * x25)) + ((0x2 * ((uint64_t)x17 * x23)) + (0x2 * ((uint64_t)x16 * x21))))))))));
-{ uint32_t x40 = (uint32_t) (x39 >> 0x12);
-{ uint32_t x41 = ((uint32_t)x39 & 0x3ffff);
-{ uint64_t x42 = (x40 + x38);
-{ uint32_t x43 = (uint32_t) (x42 >> 0x11);
-{ uint32_t x44 = ((uint32_t)x42 & 0x1ffff);
-{ uint64_t x45 = (x43 + x37);
-{ uint32_t x46 = (uint32_t) (x45 >> 0x11);
-{ uint32_t x47 = ((uint32_t)x45 & 0x1ffff);
-{ uint64_t x48 = (x46 + x36);
-{ uint32_t x49 = (uint32_t) (x48 >> 0x11);
-{ uint32_t x50 = ((uint32_t)x48 & 0x1ffff);
-{ uint64_t x51 = (x49 + x35);
-{ uint32_t x52 = (uint32_t) (x51 >> 0x11);
-{ uint32_t x53 = ((uint32_t)x51 & 0x1ffff);
-{ uint64_t x54 = (x52 + x34);
-{ uint32_t x55 = (uint32_t) (x54 >> 0x11);
-{ uint32_t x56 = ((uint32_t)x54 & 0x1ffff);
-{ uint64_t x57 = (x55 + x33);
-{ uint32_t x58 = (uint32_t) (x57 >> 0x11);
-{ uint32_t x59 = ((uint32_t)x57 & 0x1ffff);
-{ uint64_t x60 = (x58 + x32);
-{ uint32_t x61 = (uint32_t) (x60 >> 0x11);
-{ uint32_t x62 = ((uint32_t)x60 & 0x1ffff);
-{ uint32_t x63 = (x41 + (0xd * x61));
-{ uint32_t x64 = (x63 >> 0x12);
-{ uint32_t x65 = (x63 & 0x3ffff);
-{ uint32_t x66 = (x64 + x44);
-{ uint32_t x67 = (x66 >> 0x11);
-{ uint32_t x68 = (x66 & 0x1ffff);
-out[0] = x62;
-out[1] = x59;
-out[2] = x56;
-out[3] = x53;
-out[4] = x50;
-out[5] = x67 + x47;
-out[6] = x68;
-out[7] = x65;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void femul(uint32_t out[8], const uint32_t in1[8], const uint32_t in2[8]) {
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x30 = in2[7];
+ { const uint32_t x31 = in2[6];
+ { const uint32_t x29 = in2[5];
+ { const uint32_t x27 = in2[4];
+ { const uint32_t x25 = in2[3];
+ { const uint32_t x23 = in2[2];
+ { const uint32_t x21 = in2[1];
+ { const uint32_t x19 = in2[0];
+ { uint64_t x32 = (((uint64_t)x5 * x30) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((0x2 * ((uint64_t)x11 * x27)) + ((0x2 * ((uint64_t)x13 * x25)) + ((0x2 * ((uint64_t)x15 * x23)) + ((0x2 * ((uint64_t)x17 * x21)) + ((uint64_t)x16 * x19))))))));
+ { uint64_t x33 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((0x2 * ((uint64_t)x9 * x27)) + ((0x2 * ((uint64_t)x11 * x25)) + ((0x2 * ((uint64_t)x13 * x23)) + ((0x2 * ((uint64_t)x15 * x21)) + ((uint64_t)x17 * x19))))))) + (0xd * ((uint64_t)x16 * x30)));
+ { uint64_t x34 = ((((uint64_t)x5 * x29) + ((0x2 * ((uint64_t)x7 * x27)) + ((0x2 * ((uint64_t)x9 * x25)) + ((0x2 * ((uint64_t)x11 * x23)) + ((0x2 * ((uint64_t)x13 * x21)) + ((uint64_t)x15 * x19)))))) + (0xd * (((uint64_t)x17 * x30) + ((uint64_t)x16 * x31))));
+ { uint64_t x35 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((0x2 * ((uint64_t)x11 * x21)) + ((uint64_t)x13 * x19))))) + (0xd * (((uint64_t)x15 * x30) + (((uint64_t)x17 * x31) + ((uint64_t)x16 * x29)))));
+ { uint64_t x36 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((0x2 * ((uint64_t)x9 * x21)) + ((uint64_t)x11 * x19)))) + (0xd * (((uint64_t)x13 * x30) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x16 * x27))))));
+ { uint64_t x37 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + ((uint64_t)x9 * x19))) + (0xd * (((uint64_t)x11 * x30) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + (((uint64_t)x17 * x27) + ((uint64_t)x16 * x25)))))));
+ { uint64_t x38 = ((((uint64_t)x5 * x21) + ((uint64_t)x7 * x19)) + (0xd * (((uint64_t)x9 * x30) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x16 * x23))))))));
+ { uint64_t x39 = (((uint64_t)x5 * x19) + (0xd * ((0x2 * ((uint64_t)x7 * x30)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((0x2 * ((uint64_t)x13 * x27)) + ((0x2 * ((uint64_t)x15 * x25)) + ((0x2 * ((uint64_t)x17 * x23)) + (0x2 * ((uint64_t)x16 * x21))))))))));
+ { uint32_t x40 = (uint32_t) (x39 >> 0x12);
+ { uint32_t x41 = ((uint32_t)x39 & 0x3ffff);
+ { uint64_t x42 = (x40 + x38);
+ { uint32_t x43 = (uint32_t) (x42 >> 0x11);
+ { uint32_t x44 = ((uint32_t)x42 & 0x1ffff);
+ { uint64_t x45 = (x43 + x37);
+ { uint32_t x46 = (uint32_t) (x45 >> 0x11);
+ { uint32_t x47 = ((uint32_t)x45 & 0x1ffff);
+ { uint64_t x48 = (x46 + x36);
+ { uint32_t x49 = (uint32_t) (x48 >> 0x11);
+ { uint32_t x50 = ((uint32_t)x48 & 0x1ffff);
+ { uint64_t x51 = (x49 + x35);
+ { uint32_t x52 = (uint32_t) (x51 >> 0x11);
+ { uint32_t x53 = ((uint32_t)x51 & 0x1ffff);
+ { uint64_t x54 = (x52 + x34);
+ { uint32_t x55 = (uint32_t) (x54 >> 0x11);
+ { uint32_t x56 = ((uint32_t)x54 & 0x1ffff);
+ { uint64_t x57 = (x55 + x33);
+ { uint32_t x58 = (uint32_t) (x57 >> 0x11);
+ { uint32_t x59 = ((uint32_t)x57 & 0x1ffff);
+ { uint64_t x60 = (x58 + x32);
+ { uint32_t x61 = (uint32_t) (x60 >> 0x11);
+ { uint32_t x62 = ((uint32_t)x60 & 0x1ffff);
+ { uint32_t x63 = (x41 + (0xd * x61));
+ { uint32_t x64 = (x63 >> 0x12);
+ { uint32_t x65 = (x63 & 0x3ffff);
+ { uint32_t x66 = (x64 + x44);
+ { uint32_t x67 = (x66 >> 0x11);
+ { uint32_t x68 = (x66 & 0x1ffff);
+ out[0] = x65;
+ out[1] = x68;
+ out[2] = (x67 + x47);
+ out[3] = x50;
+ out[4] = x53;
+ out[5] = x56;
+ out[6] = x59;
+ out[7] = x62;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e137m13/fesquare.c b/src/Specific/solinas32_2e137m13/fesquare.c
index 805e9790e..95472900d 100644
--- a/src/Specific/solinas32_2e137m13/fesquare.c
+++ b/src/Specific/solinas32_2e137m13/fesquare.c
@@ -1,66 +1,56 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x15 = (((uint64_t)x2 * x13) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x13 * x2))))))));
-{ uint64_t x16 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0xd * ((uint64_t)x13 * x13)));
-{ uint64_t x17 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0xd * (((uint64_t)x14 * x13) + ((uint64_t)x13 * x14))));
-{ uint64_t x18 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0xd * (((uint64_t)x12 * x13) + (((uint64_t)x14 * x14) + ((uint64_t)x13 * x12)))));
-{ uint64_t x19 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0xd * (((uint64_t)x10 * x13) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x13 * x10))))));
-{ uint64_t x20 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0xd * (((uint64_t)x8 * x13) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x13 * x8)))))));
-{ uint64_t x21 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0xd * (((uint64_t)x6 * x13) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x13 * x6))))))));
-{ uint64_t x22 = (((uint64_t)x2 * x2) + (0xd * ((0x2 * ((uint64_t)x4 * x13)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + (0x2 * ((uint64_t)x13 * x4))))))))));
-{ uint32_t x23 = (uint32_t) (x22 >> 0x12);
-{ uint32_t x24 = ((uint32_t)x22 & 0x3ffff);
-{ uint64_t x25 = (x23 + x21);
-{ uint32_t x26 = (uint32_t) (x25 >> 0x11);
-{ uint32_t x27 = ((uint32_t)x25 & 0x1ffff);
-{ uint64_t x28 = (x26 + x20);
-{ uint32_t x29 = (uint32_t) (x28 >> 0x11);
-{ uint32_t x30 = ((uint32_t)x28 & 0x1ffff);
-{ uint64_t x31 = (x29 + x19);
-{ uint32_t x32 = (uint32_t) (x31 >> 0x11);
-{ uint32_t x33 = ((uint32_t)x31 & 0x1ffff);
-{ uint64_t x34 = (x32 + x18);
-{ uint32_t x35 = (uint32_t) (x34 >> 0x11);
-{ uint32_t x36 = ((uint32_t)x34 & 0x1ffff);
-{ uint64_t x37 = (x35 + x17);
-{ uint32_t x38 = (uint32_t) (x37 >> 0x11);
-{ uint32_t x39 = ((uint32_t)x37 & 0x1ffff);
-{ uint64_t x40 = (x38 + x16);
-{ uint32_t x41 = (uint32_t) (x40 >> 0x11);
-{ uint32_t x42 = ((uint32_t)x40 & 0x1ffff);
-{ uint64_t x43 = (x41 + x15);
-{ uint32_t x44 = (uint32_t) (x43 >> 0x11);
-{ uint32_t x45 = ((uint32_t)x43 & 0x1ffff);
-{ uint32_t x46 = (x24 + (0xd * x44));
-{ uint32_t x47 = (x46 >> 0x12);
-{ uint32_t x48 = (x46 & 0x3ffff);
-{ uint32_t x49 = (x47 + x27);
-{ uint32_t x50 = (x49 >> 0x11);
-{ uint32_t x51 = (x49 & 0x1ffff);
-out[0] = x45;
-out[1] = x42;
-out[2] = x39;
-out[3] = x36;
-out[4] = x33;
-out[5] = x50 + x30;
-out[6] = x51;
-out[7] = x48;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void fesquare(uint32_t out[8], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x15 = (((uint64_t)x2 * x13) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x13 * x2))))))));
+ { uint64_t x16 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0xd * ((uint64_t)x13 * x13)));
+ { uint64_t x17 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0xd * (((uint64_t)x14 * x13) + ((uint64_t)x13 * x14))));
+ { uint64_t x18 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0xd * (((uint64_t)x12 * x13) + (((uint64_t)x14 * x14) + ((uint64_t)x13 * x12)))));
+ { uint64_t x19 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0xd * (((uint64_t)x10 * x13) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x13 * x10))))));
+ { uint64_t x20 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0xd * (((uint64_t)x8 * x13) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x13 * x8)))))));
+ { uint64_t x21 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0xd * (((uint64_t)x6 * x13) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x13 * x6))))))));
+ { uint64_t x22 = (((uint64_t)x2 * x2) + (0xd * ((0x2 * ((uint64_t)x4 * x13)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + (0x2 * ((uint64_t)x13 * x4))))))))));
+ { uint32_t x23 = (uint32_t) (x22 >> 0x12);
+ { uint32_t x24 = ((uint32_t)x22 & 0x3ffff);
+ { uint64_t x25 = (x23 + x21);
+ { uint32_t x26 = (uint32_t) (x25 >> 0x11);
+ { uint32_t x27 = ((uint32_t)x25 & 0x1ffff);
+ { uint64_t x28 = (x26 + x20);
+ { uint32_t x29 = (uint32_t) (x28 >> 0x11);
+ { uint32_t x30 = ((uint32_t)x28 & 0x1ffff);
+ { uint64_t x31 = (x29 + x19);
+ { uint32_t x32 = (uint32_t) (x31 >> 0x11);
+ { uint32_t x33 = ((uint32_t)x31 & 0x1ffff);
+ { uint64_t x34 = (x32 + x18);
+ { uint32_t x35 = (uint32_t) (x34 >> 0x11);
+ { uint32_t x36 = ((uint32_t)x34 & 0x1ffff);
+ { uint64_t x37 = (x35 + x17);
+ { uint32_t x38 = (uint32_t) (x37 >> 0x11);
+ { uint32_t x39 = ((uint32_t)x37 & 0x1ffff);
+ { uint64_t x40 = (x38 + x16);
+ { uint32_t x41 = (uint32_t) (x40 >> 0x11);
+ { uint32_t x42 = ((uint32_t)x40 & 0x1ffff);
+ { uint64_t x43 = (x41 + x15);
+ { uint32_t x44 = (uint32_t) (x43 >> 0x11);
+ { uint32_t x45 = ((uint32_t)x43 & 0x1ffff);
+ { uint32_t x46 = (x24 + (0xd * x44));
+ { uint32_t x47 = (x46 >> 0x12);
+ { uint32_t x48 = (x46 & 0x3ffff);
+ { uint32_t x49 = (x47 + x27);
+ { uint32_t x50 = (x49 >> 0x11);
+ { uint32_t x51 = (x49 & 0x1ffff);
+ out[0] = x48;
+ out[1] = x51;
+ out[2] = (x50 + x30);
+ out[3] = x33;
+ out[4] = x36;
+ out[5] = x39;
+ out[6] = x42;
+ out[7] = x45;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e137m13/freeze.c b/src/Specific/solinas32_2e137m13/freeze.c
index 041ae06fe..57df7987e 100644
--- a/src/Specific/solinas32_2e137m13/freeze.c
+++ b/src/Specific/solinas32_2e137m13/freeze.c
@@ -1,25 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x16;
-out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 18 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3fff3;;
+static void freeze(uint32_t out[8], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x3fff3);
+ { uint32_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x17, Return x4, 0x1ffff);
+ { uint32_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x20, Return x6, 0x1ffff);
+ { uint32_t x25, uint8_t x26 = Op (Syntax.SubWithGetBorrow 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x23, Return x8, 0x1ffff);
+ { uint32_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x26, Return x10, 0x1ffff);
+ { uint32_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x29, Return x12, 0x1ffff);
+ { uint32_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x32, Return x14, 0x1ffff);
+ { uint32_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x35, Return x13, 0x1ffff);
+ { uint32_t x39 = (uint32_t)cmovznz(x38, 0x0, 0xffffffff);
+ { uint32_t x40 = (x39 & 0x3fff3);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x16, Return x40);
+ { uint32_t x44 = (x39 & 0x1ffff);
+ { uint32_t x46, uint8_t x47 = Op (Syntax.AddWithGetCarry 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x43, Return x19, Return x44);
+ { uint32_t x48 = (x39 & 0x1ffff);
+ { uint32_t x50, uint8_t x51 = Op (Syntax.AddWithGetCarry 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x47, Return x22, Return x48);
+ { uint32_t x52 = (x39 & 0x1ffff);
+ { uint32_t x54, uint8_t x55 = Op (Syntax.AddWithGetCarry 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x51, Return x25, Return x52);
+ { uint32_t x56 = (x39 & 0x1ffff);
+ { uint32_t x58, uint8_t x59 = Op (Syntax.AddWithGetCarry 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x55, Return x28, Return x56);
+ { uint32_t x60 = (x39 & 0x1ffff);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x59, Return x31, Return x60);
+ { uint32_t x64 = (x39 & 0x1ffff);
+ { uint32_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x34, Return x64);
+ { uint32_t x68 = (x39 & 0x1ffff);
+ { uint32_t x70, uint8_t _ = Op (Syntax.AddWithGetCarry 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x67, Return x37, Return x68);
+ out[0] = x42;
+ out[1] = x46;
+ out[2] = x50;
+ out[3] = x54;
+ out[4] = x58;
+ out[5] = x62;
+ out[6] = x66;
+ out[7] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e140m27/femul.c b/src/Specific/solinas32_2e140m27/femul.c
index 387a48ea0..f8c7f04fd 100644
--- a/src/Specific/solinas32_2e140m27/femul.c
+++ b/src/Specific/solinas32_2e140m27/femul.c
@@ -1,56 +1,50 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x24 = (((uint64_t)x5 * x22) + ((0x2 * ((uint64_t)x7 * x23)) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + ((0x2 * ((uint64_t)x13 * x17)) + ((uint64_t)x12 * x15))))));
-{ uint64_t x25 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + (((uint64_t)x9 * x19) + (((uint64_t)x11 * x17) + ((uint64_t)x13 * x15))))) + (0x1b * ((uint64_t)x12 * x22)));
-{ uint64_t x26 = ((((uint64_t)x5 * x21) + ((0x2 * ((uint64_t)x7 * x19)) + ((0x2 * ((uint64_t)x9 * x17)) + ((uint64_t)x11 * x15)))) + (0x1b * ((0x2 * ((uint64_t)x13 * x22)) + (0x2 * ((uint64_t)x12 * x23)))));
-{ uint64_t x27 = ((((uint64_t)x5 * x19) + ((0x2 * ((uint64_t)x7 * x17)) + ((uint64_t)x9 * x15))) + (0x1b * (((uint64_t)x11 * x22) + ((0x2 * ((uint64_t)x13 * x23)) + ((uint64_t)x12 * x21)))));
-{ uint64_t x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) + (0x1b * (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19))))));
-{ uint64_t x29 = (((uint64_t)x5 * x15) + (0x1b * ((0x2 * ((uint64_t)x7 * x22)) + ((0x2 * ((uint64_t)x9 * x23)) + (((uint64_t)x11 * x21) + ((0x2 * ((uint64_t)x13 * x19)) + (0x2 * ((uint64_t)x12 * x17))))))));
-{ uint64_t x30 = (x29 >> 0x18);
-{ uint32_t x31 = ((uint32_t)x29 & 0xffffff);
-{ uint64_t x32 = (x30 + x28);
-{ uint64_t x33 = (x32 >> 0x17);
-{ uint32_t x34 = ((uint32_t)x32 & 0x7fffff);
-{ uint64_t x35 = (x33 + x27);
-{ uint64_t x36 = (x35 >> 0x17);
-{ uint32_t x37 = ((uint32_t)x35 & 0x7fffff);
-{ uint64_t x38 = (x36 + x26);
-{ uint64_t x39 = (x38 >> 0x18);
-{ uint32_t x40 = ((uint32_t)x38 & 0xffffff);
-{ uint64_t x41 = (x39 + x25);
-{ uint32_t x42 = (uint32_t) (x41 >> 0x17);
-{ uint32_t x43 = ((uint32_t)x41 & 0x7fffff);
-{ uint64_t x44 = (x42 + x24);
-{ uint32_t x45 = (uint32_t) (x44 >> 0x17);
-{ uint32_t x46 = ((uint32_t)x44 & 0x7fffff);
-{ uint64_t x47 = (x31 + ((uint64_t)0x1b * x45));
-{ uint32_t x48 = (uint32_t) (x47 >> 0x18);
-{ uint32_t x49 = ((uint32_t)x47 & 0xffffff);
-{ uint32_t x50 = (x48 + x34);
-{ uint32_t x51 = (x50 >> 0x17);
-{ uint32_t x52 = (x50 & 0x7fffff);
-out[0] = x46;
-out[1] = x43;
-out[2] = x40;
-out[3] = x51 + x37;
-out[4] = x52;
-out[5] = x49;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint64_t x24 = (((uint64_t)x5 * x22) + ((0x2 * ((uint64_t)x7 * x23)) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + ((0x2 * ((uint64_t)x13 * x17)) + ((uint64_t)x12 * x15))))));
+ { uint64_t x25 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + (((uint64_t)x9 * x19) + (((uint64_t)x11 * x17) + ((uint64_t)x13 * x15))))) + (0x1b * ((uint64_t)x12 * x22)));
+ { uint64_t x26 = ((((uint64_t)x5 * x21) + ((0x2 * ((uint64_t)x7 * x19)) + ((0x2 * ((uint64_t)x9 * x17)) + ((uint64_t)x11 * x15)))) + (0x1b * ((0x2 * ((uint64_t)x13 * x22)) + (0x2 * ((uint64_t)x12 * x23)))));
+ { uint64_t x27 = ((((uint64_t)x5 * x19) + ((0x2 * ((uint64_t)x7 * x17)) + ((uint64_t)x9 * x15))) + (0x1b * (((uint64_t)x11 * x22) + ((0x2 * ((uint64_t)x13 * x23)) + ((uint64_t)x12 * x21)))));
+ { uint64_t x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) + (0x1b * (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19))))));
+ { uint64_t x29 = (((uint64_t)x5 * x15) + (0x1b * ((0x2 * ((uint64_t)x7 * x22)) + ((0x2 * ((uint64_t)x9 * x23)) + (((uint64_t)x11 * x21) + ((0x2 * ((uint64_t)x13 * x19)) + (0x2 * ((uint64_t)x12 * x17))))))));
+ { uint64_t x30 = (x29 >> 0x18);
+ { uint32_t x31 = ((uint32_t)x29 & 0xffffff);
+ { uint64_t x32 = (x30 + x28);
+ { uint64_t x33 = (x32 >> 0x17);
+ { uint32_t x34 = ((uint32_t)x32 & 0x7fffff);
+ { uint64_t x35 = (x33 + x27);
+ { uint64_t x36 = (x35 >> 0x17);
+ { uint32_t x37 = ((uint32_t)x35 & 0x7fffff);
+ { uint64_t x38 = (x36 + x26);
+ { uint64_t x39 = (x38 >> 0x18);
+ { uint32_t x40 = ((uint32_t)x38 & 0xffffff);
+ { uint64_t x41 = (x39 + x25);
+ { uint32_t x42 = (uint32_t) (x41 >> 0x17);
+ { uint32_t x43 = ((uint32_t)x41 & 0x7fffff);
+ { uint64_t x44 = (x42 + x24);
+ { uint32_t x45 = (uint32_t) (x44 >> 0x17);
+ { uint32_t x46 = ((uint32_t)x44 & 0x7fffff);
+ { uint64_t x47 = (x31 + ((uint64_t)0x1b * x45));
+ { uint32_t x48 = (uint32_t) (x47 >> 0x18);
+ { uint32_t x49 = ((uint32_t)x47 & 0xffffff);
+ { uint32_t x50 = (x48 + x34);
+ { uint32_t x51 = (x50 >> 0x17);
+ { uint32_t x52 = (x50 & 0x7fffff);
+ out[0] = x49;
+ out[1] = x52;
+ out[2] = (x51 + x37);
+ out[3] = x40;
+ out[4] = x43;
+ out[5] = x46;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e140m27/fesquare.c b/src/Specific/solinas32_2e140m27/fesquare.c
index 07733e6c3..a7149e90e 100644
--- a/src/Specific/solinas32_2e140m27/fesquare.c
+++ b/src/Specific/solinas32_2e140m27/fesquare.c
@@ -1,56 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x11 = (((uint64_t)x2 * x9) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x9 * x2))))));
-{ uint64_t x12 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x1b * ((uint64_t)x9 * x9)));
-{ uint64_t x13 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x1b * ((0x2 * ((uint64_t)x10 * x9)) + (0x2 * ((uint64_t)x9 * x10)))));
-{ uint64_t x14 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x1b * (((uint64_t)x8 * x9) + ((0x2 * ((uint64_t)x10 * x10)) + ((uint64_t)x9 * x8)))));
-{ uint64_t x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x1b * (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6))))));
-{ uint64_t x16 = (((uint64_t)x2 * x2) + (0x1b * ((0x2 * ((uint64_t)x4 * x9)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + (0x2 * ((uint64_t)x9 * x4))))))));
-{ uint64_t x17 = (x16 >> 0x18);
-{ uint32_t x18 = ((uint32_t)x16 & 0xffffff);
-{ uint64_t x19 = (x17 + x15);
-{ uint64_t x20 = (x19 >> 0x17);
-{ uint32_t x21 = ((uint32_t)x19 & 0x7fffff);
-{ uint64_t x22 = (x20 + x14);
-{ uint64_t x23 = (x22 >> 0x17);
-{ uint32_t x24 = ((uint32_t)x22 & 0x7fffff);
-{ uint64_t x25 = (x23 + x13);
-{ uint64_t x26 = (x25 >> 0x18);
-{ uint32_t x27 = ((uint32_t)x25 & 0xffffff);
-{ uint64_t x28 = (x26 + x12);
-{ uint32_t x29 = (uint32_t) (x28 >> 0x17);
-{ uint32_t x30 = ((uint32_t)x28 & 0x7fffff);
-{ uint64_t x31 = (x29 + x11);
-{ uint32_t x32 = (uint32_t) (x31 >> 0x17);
-{ uint32_t x33 = ((uint32_t)x31 & 0x7fffff);
-{ uint64_t x34 = (x18 + ((uint64_t)0x1b * x32));
-{ uint32_t x35 = (uint32_t) (x34 >> 0x18);
-{ uint32_t x36 = ((uint32_t)x34 & 0xffffff);
-{ uint32_t x37 = (x35 + x21);
-{ uint32_t x38 = (x37 >> 0x17);
-{ uint32_t x39 = (x37 & 0x7fffff);
-out[0] = x33;
-out[1] = x30;
-out[2] = x27;
-out[3] = x38 + x24;
-out[4] = x39;
-out[5] = x36;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesquare(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x11 = (((uint64_t)x2 * x9) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x9 * x2))))));
+ { uint64_t x12 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x1b * ((uint64_t)x9 * x9)));
+ { uint64_t x13 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x1b * ((0x2 * ((uint64_t)x10 * x9)) + (0x2 * ((uint64_t)x9 * x10)))));
+ { uint64_t x14 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x1b * (((uint64_t)x8 * x9) + ((0x2 * ((uint64_t)x10 * x10)) + ((uint64_t)x9 * x8)))));
+ { uint64_t x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x1b * (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6))))));
+ { uint64_t x16 = (((uint64_t)x2 * x2) + (0x1b * ((0x2 * ((uint64_t)x4 * x9)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + (0x2 * ((uint64_t)x9 * x4))))))));
+ { uint64_t x17 = (x16 >> 0x18);
+ { uint32_t x18 = ((uint32_t)x16 & 0xffffff);
+ { uint64_t x19 = (x17 + x15);
+ { uint64_t x20 = (x19 >> 0x17);
+ { uint32_t x21 = ((uint32_t)x19 & 0x7fffff);
+ { uint64_t x22 = (x20 + x14);
+ { uint64_t x23 = (x22 >> 0x17);
+ { uint32_t x24 = ((uint32_t)x22 & 0x7fffff);
+ { uint64_t x25 = (x23 + x13);
+ { uint64_t x26 = (x25 >> 0x18);
+ { uint32_t x27 = ((uint32_t)x25 & 0xffffff);
+ { uint64_t x28 = (x26 + x12);
+ { uint32_t x29 = (uint32_t) (x28 >> 0x17);
+ { uint32_t x30 = ((uint32_t)x28 & 0x7fffff);
+ { uint64_t x31 = (x29 + x11);
+ { uint32_t x32 = (uint32_t) (x31 >> 0x17);
+ { uint32_t x33 = ((uint32_t)x31 & 0x7fffff);
+ { uint64_t x34 = (x18 + ((uint64_t)0x1b * x32));
+ { uint32_t x35 = (uint32_t) (x34 >> 0x18);
+ { uint32_t x36 = ((uint32_t)x34 & 0xffffff);
+ { uint32_t x37 = (x35 + x21);
+ { uint32_t x38 = (x37 >> 0x17);
+ { uint32_t x39 = (x37 & 0x7fffff);
+ out[0] = x36;
+ out[1] = x39;
+ out[2] = (x38 + x24);
+ out[3] = x27;
+ out[4] = x30;
+ out[5] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e140m27/freeze.c b/src/Specific/solinas32_2e140m27/freeze.c
index 6d3117413..e4c539eb8 100644
--- a/src/Specific/solinas32_2e140m27/freeze.c
+++ b/src/Specific/solinas32_2e140m27/freeze.c
@@ -1,25 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x12;
-out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 24 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffe5;;
+static void freeze(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0xffffe5);
+ { uint32_t x15, uint8_t x16 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x13, Return x4, 0x7fffff);
+ { uint32_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x16, Return x6, 0x7fffff);
+ { uint32_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x19, Return x8, 0xffffff);
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x22, Return x10, 0x7fffff);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x9, 0x7fffff);
+ { uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+ { uint32_t x30 = (x29 & 0xffffe5);
+ { uint32_t x32, uint8_t x33 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x12, Return x30);
+ { uint32_t x34 = (x29 & 0x7fffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x15, Return x34);
+ { uint32_t x38 = (x29 & 0x7fffff);
+ { uint32_t x40, uint8_t x41 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x18, Return x38);
+ { uint32_t x42 = (x29 & 0xffffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x41, Return x21, Return x42);
+ { uint32_t x46 = (x29 & 0x7fffff);
+ { uint32_t x48, uint8_t x49 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x24, Return x46);
+ { uint32_t x50 = (x29 & 0x7fffff);
+ { uint32_t x52, uint8_t _ = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x49, Return x27, Return x50);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e141m9/femul.c b/src/Specific/solinas32_2e141m9/femul.c
index edbb8a1e1..bfc1b2f93 100644
--- a/src/Specific/solinas32_2e141m9/femul.c
+++ b/src/Specific/solinas32_2e141m9/femul.c
@@ -1,56 +1,50 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x24 = (((uint64_t)x5 * x22) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + (((uint64_t)x13 * x17) + ((uint64_t)x12 * x15))))));
-{ uint64_t x25 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + (((uint64_t)x9 * x19) + ((0x2 * ((uint64_t)x11 * x17)) + ((uint64_t)x13 * x15))))) + (0x9 * (0x2 * ((uint64_t)x12 * x22))));
-{ uint64_t x26 = ((((uint64_t)x5 * x21) + (((uint64_t)x7 * x19) + (((uint64_t)x9 * x17) + ((uint64_t)x11 * x15)))) + (0x9 * (((uint64_t)x13 * x22) + ((uint64_t)x12 * x23))));
-{ uint64_t x27 = ((((uint64_t)x5 * x19) + ((0x2 * ((uint64_t)x7 * x17)) + ((uint64_t)x9 * x15))) + (0x9 * ((0x2 * ((uint64_t)x11 * x22)) + (((uint64_t)x13 * x23) + (0x2 * ((uint64_t)x12 * x21))))));
-{ uint64_t x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) + (0x9 * (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19))))));
-{ uint64_t x29 = (((uint64_t)x5 * x15) + (0x9 * ((0x2 * ((uint64_t)x7 * x22)) + (((uint64_t)x9 * x23) + ((0x2 * ((uint64_t)x11 * x21)) + (((uint64_t)x13 * x19) + (0x2 * ((uint64_t)x12 * x17))))))));
-{ uint64_t x30 = (x29 >> 0x18);
-{ uint32_t x31 = ((uint32_t)x29 & 0xffffff);
-{ uint64_t x32 = (x30 + x28);
-{ uint64_t x33 = (x32 >> 0x17);
-{ uint32_t x34 = ((uint32_t)x32 & 0x7fffff);
-{ uint64_t x35 = (x33 + x27);
-{ uint32_t x36 = (uint32_t) (x35 >> 0x18);
-{ uint32_t x37 = ((uint32_t)x35 & 0xffffff);
-{ uint64_t x38 = (x36 + x26);
-{ uint32_t x39 = (uint32_t) (x38 >> 0x17);
-{ uint32_t x40 = ((uint32_t)x38 & 0x7fffff);
-{ uint64_t x41 = (x39 + x25);
-{ uint32_t x42 = (uint32_t) (x41 >> 0x18);
-{ uint32_t x43 = ((uint32_t)x41 & 0xffffff);
-{ uint64_t x44 = (x42 + x24);
-{ uint32_t x45 = (uint32_t) (x44 >> 0x17);
-{ uint32_t x46 = ((uint32_t)x44 & 0x7fffff);
-{ uint64_t x47 = (x31 + ((uint64_t)0x9 * x45));
-{ uint32_t x48 = (uint32_t) (x47 >> 0x18);
-{ uint32_t x49 = ((uint32_t)x47 & 0xffffff);
-{ uint32_t x50 = (x48 + x34);
-{ uint32_t x51 = (x50 >> 0x17);
-{ uint32_t x52 = (x50 & 0x7fffff);
-out[0] = x46;
-out[1] = x43;
-out[2] = x40;
-out[3] = x51 + x37;
-out[4] = x52;
-out[5] = x49;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint64_t x24 = (((uint64_t)x5 * x22) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + (((uint64_t)x13 * x17) + ((uint64_t)x12 * x15))))));
+ { uint64_t x25 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + (((uint64_t)x9 * x19) + ((0x2 * ((uint64_t)x11 * x17)) + ((uint64_t)x13 * x15))))) + (0x9 * (0x2 * ((uint64_t)x12 * x22))));
+ { uint64_t x26 = ((((uint64_t)x5 * x21) + (((uint64_t)x7 * x19) + (((uint64_t)x9 * x17) + ((uint64_t)x11 * x15)))) + (0x9 * (((uint64_t)x13 * x22) + ((uint64_t)x12 * x23))));
+ { uint64_t x27 = ((((uint64_t)x5 * x19) + ((0x2 * ((uint64_t)x7 * x17)) + ((uint64_t)x9 * x15))) + (0x9 * ((0x2 * ((uint64_t)x11 * x22)) + (((uint64_t)x13 * x23) + (0x2 * ((uint64_t)x12 * x21))))));
+ { uint64_t x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) + (0x9 * (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19))))));
+ { uint64_t x29 = (((uint64_t)x5 * x15) + (0x9 * ((0x2 * ((uint64_t)x7 * x22)) + (((uint64_t)x9 * x23) + ((0x2 * ((uint64_t)x11 * x21)) + (((uint64_t)x13 * x19) + (0x2 * ((uint64_t)x12 * x17))))))));
+ { uint64_t x30 = (x29 >> 0x18);
+ { uint32_t x31 = ((uint32_t)x29 & 0xffffff);
+ { uint64_t x32 = (x30 + x28);
+ { uint64_t x33 = (x32 >> 0x17);
+ { uint32_t x34 = ((uint32_t)x32 & 0x7fffff);
+ { uint64_t x35 = (x33 + x27);
+ { uint32_t x36 = (uint32_t) (x35 >> 0x18);
+ { uint32_t x37 = ((uint32_t)x35 & 0xffffff);
+ { uint64_t x38 = (x36 + x26);
+ { uint32_t x39 = (uint32_t) (x38 >> 0x17);
+ { uint32_t x40 = ((uint32_t)x38 & 0x7fffff);
+ { uint64_t x41 = (x39 + x25);
+ { uint32_t x42 = (uint32_t) (x41 >> 0x18);
+ { uint32_t x43 = ((uint32_t)x41 & 0xffffff);
+ { uint64_t x44 = (x42 + x24);
+ { uint32_t x45 = (uint32_t) (x44 >> 0x17);
+ { uint32_t x46 = ((uint32_t)x44 & 0x7fffff);
+ { uint64_t x47 = (x31 + ((uint64_t)0x9 * x45));
+ { uint32_t x48 = (uint32_t) (x47 >> 0x18);
+ { uint32_t x49 = ((uint32_t)x47 & 0xffffff);
+ { uint32_t x50 = (x48 + x34);
+ { uint32_t x51 = (x50 >> 0x17);
+ { uint32_t x52 = (x50 & 0x7fffff);
+ out[0] = x49;
+ out[1] = x52;
+ out[2] = (x51 + x37);
+ out[3] = x40;
+ out[4] = x43;
+ out[5] = x46;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e141m9/fesquare.c b/src/Specific/solinas32_2e141m9/fesquare.c
index 1b26664f6..a0f82da0d 100644
--- a/src/Specific/solinas32_2e141m9/fesquare.c
+++ b/src/Specific/solinas32_2e141m9/fesquare.c
@@ -1,56 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x11 = (((uint64_t)x2 * x9) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x9 * x2))))));
-{ uint64_t x12 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + (((uint64_t)x6 * x6) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x9 * (0x2 * ((uint64_t)x9 * x9))));
-{ uint64_t x13 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x9 * (((uint64_t)x10 * x9) + ((uint64_t)x9 * x10))));
-{ uint64_t x14 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x9 * ((0x2 * ((uint64_t)x8 * x9)) + (((uint64_t)x10 * x10) + (0x2 * ((uint64_t)x9 * x8))))));
-{ uint64_t x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x9 * (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6))))));
-{ uint64_t x16 = (((uint64_t)x2 * x2) + (0x9 * ((0x2 * ((uint64_t)x4 * x9)) + (((uint64_t)x6 * x10) + ((0x2 * ((uint64_t)x8 * x8)) + (((uint64_t)x10 * x6) + (0x2 * ((uint64_t)x9 * x4))))))));
-{ uint64_t x17 = (x16 >> 0x18);
-{ uint32_t x18 = ((uint32_t)x16 & 0xffffff);
-{ uint64_t x19 = (x17 + x15);
-{ uint64_t x20 = (x19 >> 0x17);
-{ uint32_t x21 = ((uint32_t)x19 & 0x7fffff);
-{ uint64_t x22 = (x20 + x14);
-{ uint32_t x23 = (uint32_t) (x22 >> 0x18);
-{ uint32_t x24 = ((uint32_t)x22 & 0xffffff);
-{ uint64_t x25 = (x23 + x13);
-{ uint32_t x26 = (uint32_t) (x25 >> 0x17);
-{ uint32_t x27 = ((uint32_t)x25 & 0x7fffff);
-{ uint64_t x28 = (x26 + x12);
-{ uint32_t x29 = (uint32_t) (x28 >> 0x18);
-{ uint32_t x30 = ((uint32_t)x28 & 0xffffff);
-{ uint64_t x31 = (x29 + x11);
-{ uint32_t x32 = (uint32_t) (x31 >> 0x17);
-{ uint32_t x33 = ((uint32_t)x31 & 0x7fffff);
-{ uint64_t x34 = (x18 + ((uint64_t)0x9 * x32));
-{ uint32_t x35 = (uint32_t) (x34 >> 0x18);
-{ uint32_t x36 = ((uint32_t)x34 & 0xffffff);
-{ uint32_t x37 = (x35 + x21);
-{ uint32_t x38 = (x37 >> 0x17);
-{ uint32_t x39 = (x37 & 0x7fffff);
-out[0] = x33;
-out[1] = x30;
-out[2] = x27;
-out[3] = x38 + x24;
-out[4] = x39;
-out[5] = x36;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesquare(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x11 = (((uint64_t)x2 * x9) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x9 * x2))))));
+ { uint64_t x12 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + (((uint64_t)x6 * x6) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x9 * (0x2 * ((uint64_t)x9 * x9))));
+ { uint64_t x13 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x9 * (((uint64_t)x10 * x9) + ((uint64_t)x9 * x10))));
+ { uint64_t x14 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x9 * ((0x2 * ((uint64_t)x8 * x9)) + (((uint64_t)x10 * x10) + (0x2 * ((uint64_t)x9 * x8))))));
+ { uint64_t x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x9 * (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6))))));
+ { uint64_t x16 = (((uint64_t)x2 * x2) + (0x9 * ((0x2 * ((uint64_t)x4 * x9)) + (((uint64_t)x6 * x10) + ((0x2 * ((uint64_t)x8 * x8)) + (((uint64_t)x10 * x6) + (0x2 * ((uint64_t)x9 * x4))))))));
+ { uint64_t x17 = (x16 >> 0x18);
+ { uint32_t x18 = ((uint32_t)x16 & 0xffffff);
+ { uint64_t x19 = (x17 + x15);
+ { uint64_t x20 = (x19 >> 0x17);
+ { uint32_t x21 = ((uint32_t)x19 & 0x7fffff);
+ { uint64_t x22 = (x20 + x14);
+ { uint32_t x23 = (uint32_t) (x22 >> 0x18);
+ { uint32_t x24 = ((uint32_t)x22 & 0xffffff);
+ { uint64_t x25 = (x23 + x13);
+ { uint32_t x26 = (uint32_t) (x25 >> 0x17);
+ { uint32_t x27 = ((uint32_t)x25 & 0x7fffff);
+ { uint64_t x28 = (x26 + x12);
+ { uint32_t x29 = (uint32_t) (x28 >> 0x18);
+ { uint32_t x30 = ((uint32_t)x28 & 0xffffff);
+ { uint64_t x31 = (x29 + x11);
+ { uint32_t x32 = (uint32_t) (x31 >> 0x17);
+ { uint32_t x33 = ((uint32_t)x31 & 0x7fffff);
+ { uint64_t x34 = (x18 + ((uint64_t)0x9 * x32));
+ { uint32_t x35 = (uint32_t) (x34 >> 0x18);
+ { uint32_t x36 = ((uint32_t)x34 & 0xffffff);
+ { uint32_t x37 = (x35 + x21);
+ { uint32_t x38 = (x37 >> 0x17);
+ { uint32_t x39 = (x37 & 0x7fffff);
+ out[0] = x36;
+ out[1] = x39;
+ out[2] = (x38 + x24);
+ out[3] = x27;
+ out[4] = x30;
+ out[5] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e141m9/freeze.c b/src/Specific/solinas32_2e141m9/freeze.c
index cc7f82bb3..847198cb2 100644
--- a/src/Specific/solinas32_2e141m9/freeze.c
+++ b/src/Specific/solinas32_2e141m9/freeze.c
@@ -1,25 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x12;
-out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 24 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xfffff7;;
+static void freeze(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0xfffff7);
+ { uint32_t x15, uint8_t x16 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x13, Return x4, 0x7fffff);
+ { uint32_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x16, Return x6, 0xffffff);
+ { uint32_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x19, Return x8, 0x7fffff);
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x22, Return x10, 0xffffff);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x9, 0x7fffff);
+ { uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+ { uint32_t x30 = (x29 & 0xfffff7);
+ { uint32_t x32, uint8_t x33 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x12, Return x30);
+ { uint32_t x34 = (x29 & 0x7fffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x15, Return x34);
+ { uint32_t x38 = (x29 & 0xffffff);
+ { uint32_t x40, uint8_t x41 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x18, Return x38);
+ { uint32_t x42 = (x29 & 0x7fffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x41, Return x21, Return x42);
+ { uint32_t x46 = (x29 & 0xffffff);
+ { uint32_t x48, uint8_t x49 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x24, Return x46);
+ { uint32_t x50 = (x29 & 0x7fffff);
+ { uint32_t x52, uint8_t _ = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x49, Return x27, Return x50);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e150m3/femul.c b/src/Specific/solinas32_2e150m3/femul.c
index 6b0d36b25..a7a59c7b0 100644
--- a/src/Specific/solinas32_2e150m3/femul.c
+++ b/src/Specific/solinas32_2e150m3/femul.c
@@ -1,51 +1,43 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ ℤ x20 = (((uint64_t)x5 * x18) +ℤ (((uint64_t)x7 * x19) +ℤ (((uint64_t)x9 * x17) +ℤ (((uint64_t)x11 * x15) +ℤ ((uint64_t)x10 * x13)))));
-{ ℤ x21 = ((((uint64_t)x5 * x19) +ℤ (((uint64_t)x7 * x17) +ℤ (((uint64_t)x9 * x15) +ℤ ((uint64_t)x11 * x13)))) +ℤ (0x3 *ℤ ((uint64_t)x10 * x18)));
-{ ℤ x22 = ((((uint64_t)x5 * x17) +ℤ (((uint64_t)x7 * x15) +ℤ ((uint64_t)x9 * x13))) +ℤ (0x3 *ℤ (((uint64_t)x11 * x18) +ℤ ((uint64_t)x10 * x19))));
-{ ℤ x23 = ((((uint64_t)x5 * x15) +ℤ ((uint64_t)x7 * x13)) +ℤ (0x3 *ℤ (((uint64_t)x9 * x18) +ℤ (((uint64_t)x11 * x19) +ℤ ((uint64_t)x10 * x17)))));
-{ ℤ x24 = (((uint64_t)x5 * x13) +ℤ (0x3 *ℤ (((uint64_t)x7 * x18) +ℤ (((uint64_t)x9 * x19) +ℤ (((uint64_t)x11 * x17) +ℤ ((uint64_t)x10 * x15))))));
-{ uint64_t x25 = (x24 >> 0x1e);
-{ uint32_t x26 = (x24 & 0x3fffffff);
-{ ℤ x27 = (x25 +ℤ x23);
-{ uint64_t x28 = (x27 >> 0x1e);
-{ uint32_t x29 = (x27 & 0x3fffffff);
-{ ℤ x30 = (x28 +ℤ x22);
-{ uint64_t x31 = (x30 >> 0x1e);
-{ uint32_t x32 = (x30 & 0x3fffffff);
-{ ℤ x33 = (x31 +ℤ x21);
-{ uint64_t x34 = (x33 >> 0x1e);
-{ uint32_t x35 = (x33 & 0x3fffffff);
-{ ℤ x36 = (x34 +ℤ x20);
-{ uint64_t x37 = (x36 >> 0x1e);
-{ uint32_t x38 = (x36 & 0x3fffffff);
-{ uint64_t x39 = (x26 + (0x3 * x37));
-{ uint32_t x40 = (uint32_t) (x39 >> 0x1e);
-{ uint32_t x41 = ((uint32_t)x39 & 0x3fffffff);
-{ uint32_t x42 = (x40 + x29);
-{ uint32_t x43 = (x42 >> 0x1e);
-{ uint32_t x44 = (x42 & 0x3fffffff);
-out[0] = x38;
-out[1] = x35;
-out[2] = x43 + x32;
-out[3] = x44;
-out[4] = x41;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint32_t out[5], const uint32_t in1[5], const uint32_t in2[5]) {
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x18 = in2[4];
+ { const uint32_t x19 = in2[3];
+ { const uint32_t x17 = in2[2];
+ { const uint32_t x15 = in2[1];
+ { const uint32_t x13 = in2[0];
+ { ℤ x20 = (((uint64_t)x5 * x18) +ℤ (((uint64_t)x7 * x19) +ℤ (((uint64_t)x9 * x17) +ℤ (((uint64_t)x11 * x15) +ℤ ((uint64_t)x10 * x13)))));
+ { ℤ x21 = ((((uint64_t)x5 * x19) +ℤ (((uint64_t)x7 * x17) +ℤ (((uint64_t)x9 * x15) +ℤ ((uint64_t)x11 * x13)))) +ℤ (0x3 *ℤ ((uint64_t)x10 * x18)));
+ { ℤ x22 = ((((uint64_t)x5 * x17) +ℤ (((uint64_t)x7 * x15) +ℤ ((uint64_t)x9 * x13))) +ℤ (0x3 *ℤ (((uint64_t)x11 * x18) +ℤ ((uint64_t)x10 * x19))));
+ { ℤ x23 = ((((uint64_t)x5 * x15) +ℤ ((uint64_t)x7 * x13)) +ℤ (0x3 *ℤ (((uint64_t)x9 * x18) +ℤ (((uint64_t)x11 * x19) +ℤ ((uint64_t)x10 * x17)))));
+ { ℤ x24 = (((uint64_t)x5 * x13) +ℤ (0x3 *ℤ (((uint64_t)x7 * x18) +ℤ (((uint64_t)x9 * x19) +ℤ (((uint64_t)x11 * x17) +ℤ ((uint64_t)x10 * x15))))));
+ { uint64_t x25 = (x24 >> 0x1e);
+ { uint32_t x26 = (x24 & 0x3fffffff);
+ { ℤ x27 = (x25 +ℤ x23);
+ { uint64_t x28 = (x27 >> 0x1e);
+ { uint32_t x29 = (x27 & 0x3fffffff);
+ { ℤ x30 = (x28 +ℤ x22);
+ { uint64_t x31 = (x30 >> 0x1e);
+ { uint32_t x32 = (x30 & 0x3fffffff);
+ { ℤ x33 = (x31 +ℤ x21);
+ { uint64_t x34 = (x33 >> 0x1e);
+ { uint32_t x35 = (x33 & 0x3fffffff);
+ { ℤ x36 = (x34 +ℤ x20);
+ { uint64_t x37 = (x36 >> 0x1e);
+ { uint32_t x38 = (x36 & 0x3fffffff);
+ { uint64_t x39 = (x26 + (0x3 * x37));
+ { uint32_t x40 = (uint32_t) (x39 >> 0x1e);
+ { uint32_t x41 = ((uint32_t)x39 & 0x3fffffff);
+ { uint32_t x42 = (x40 + x29);
+ { uint32_t x43 = (x42 >> 0x1e);
+ { uint32_t x44 = (x42 & 0x3fffffff);
+ out[0] = x41;
+ out[1] = x44;
+ out[2] = (x43 + x32);
+ out[3] = x35;
+ out[4] = x38;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e150m3/fesquare.c b/src/Specific/solinas32_2e150m3/fesquare.c
index e328fb2f2..239038ceb 100644
--- a/src/Specific/solinas32_2e150m3/fesquare.c
+++ b/src/Specific/solinas32_2e150m3/fesquare.c
@@ -1,51 +1,38 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ ℤ x9 = (((uint64_t)x2 * x7) +ℤ (((uint64_t)x4 * x8) +ℤ (((uint64_t)x6 * x6) +ℤ (((uint64_t)x8 * x4) +ℤ ((uint64_t)x7 * x2)))));
-{ ℤ x10 = ((((uint64_t)x2 * x8) +ℤ (((uint64_t)x4 * x6) +ℤ (((uint64_t)x6 * x4) +ℤ ((uint64_t)x8 * x2)))) +ℤ (0x3 *ℤ ((uint64_t)x7 * x7)));
-{ ℤ x11 = ((((uint64_t)x2 * x6) +ℤ (((uint64_t)x4 * x4) +ℤ ((uint64_t)x6 * x2))) +ℤ (0x3 *ℤ (((uint64_t)x8 * x7) +ℤ ((uint64_t)x7 * x8))));
-{ ℤ x12 = ((((uint64_t)x2 * x4) +ℤ ((uint64_t)x4 * x2)) +ℤ (0x3 *ℤ (((uint64_t)x6 * x7) +ℤ (((uint64_t)x8 * x8) +ℤ ((uint64_t)x7 * x6)))));
-{ ℤ x13 = (((uint64_t)x2 * x2) +ℤ (0x3 *ℤ (((uint64_t)x4 * x7) +ℤ (((uint64_t)x6 * x8) +ℤ (((uint64_t)x8 * x6) +ℤ ((uint64_t)x7 * x4))))));
-{ uint64_t x14 = (x13 >> 0x1e);
-{ uint32_t x15 = (x13 & 0x3fffffff);
-{ ℤ x16 = (x14 +ℤ x12);
-{ uint64_t x17 = (x16 >> 0x1e);
-{ uint32_t x18 = (x16 & 0x3fffffff);
-{ ℤ x19 = (x17 +ℤ x11);
-{ uint64_t x20 = (x19 >> 0x1e);
-{ uint32_t x21 = (x19 & 0x3fffffff);
-{ ℤ x22 = (x20 +ℤ x10);
-{ uint64_t x23 = (x22 >> 0x1e);
-{ uint32_t x24 = (x22 & 0x3fffffff);
-{ ℤ x25 = (x23 +ℤ x9);
-{ uint64_t x26 = (x25 >> 0x1e);
-{ uint32_t x27 = (x25 & 0x3fffffff);
-{ uint64_t x28 = (x15 + (0x3 * x26));
-{ uint32_t x29 = (uint32_t) (x28 >> 0x1e);
-{ uint32_t x30 = ((uint32_t)x28 & 0x3fffffff);
-{ uint32_t x31 = (x29 + x18);
-{ uint32_t x32 = (x31 >> 0x1e);
-{ uint32_t x33 = (x31 & 0x3fffffff);
-out[0] = x27;
-out[1] = x24;
-out[2] = x32 + x21;
-out[3] = x33;
-out[4] = x30;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesquare(uint32_t out[5], const uint32_t in1[5]) {
+ { const uint32_t x7 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { ℤ x9 = (((uint64_t)x2 * x7) +ℤ (((uint64_t)x4 * x8) +ℤ (((uint64_t)x6 * x6) +ℤ (((uint64_t)x8 * x4) +ℤ ((uint64_t)x7 * x2)))));
+ { ℤ x10 = ((((uint64_t)x2 * x8) +ℤ (((uint64_t)x4 * x6) +ℤ (((uint64_t)x6 * x4) +ℤ ((uint64_t)x8 * x2)))) +ℤ (0x3 *ℤ ((uint64_t)x7 * x7)));
+ { ℤ x11 = ((((uint64_t)x2 * x6) +ℤ (((uint64_t)x4 * x4) +ℤ ((uint64_t)x6 * x2))) +ℤ (0x3 *ℤ (((uint64_t)x8 * x7) +ℤ ((uint64_t)x7 * x8))));
+ { ℤ x12 = ((((uint64_t)x2 * x4) +ℤ ((uint64_t)x4 * x2)) +ℤ (0x3 *ℤ (((uint64_t)x6 * x7) +ℤ (((uint64_t)x8 * x8) +ℤ ((uint64_t)x7 * x6)))));
+ { ℤ x13 = (((uint64_t)x2 * x2) +ℤ (0x3 *ℤ (((uint64_t)x4 * x7) +ℤ (((uint64_t)x6 * x8) +ℤ (((uint64_t)x8 * x6) +ℤ ((uint64_t)x7 * x4))))));
+ { uint64_t x14 = (x13 >> 0x1e);
+ { uint32_t x15 = (x13 & 0x3fffffff);
+ { ℤ x16 = (x14 +ℤ x12);
+ { uint64_t x17 = (x16 >> 0x1e);
+ { uint32_t x18 = (x16 & 0x3fffffff);
+ { ℤ x19 = (x17 +ℤ x11);
+ { uint64_t x20 = (x19 >> 0x1e);
+ { uint32_t x21 = (x19 & 0x3fffffff);
+ { ℤ x22 = (x20 +ℤ x10);
+ { uint64_t x23 = (x22 >> 0x1e);
+ { uint32_t x24 = (x22 & 0x3fffffff);
+ { ℤ x25 = (x23 +ℤ x9);
+ { uint64_t x26 = (x25 >> 0x1e);
+ { uint32_t x27 = (x25 & 0x3fffffff);
+ { uint64_t x28 = (x15 + (0x3 * x26));
+ { uint32_t x29 = (uint32_t) (x28 >> 0x1e);
+ { uint32_t x30 = ((uint32_t)x28 & 0x3fffffff);
+ { uint32_t x31 = (x29 + x18);
+ { uint32_t x32 = (x31 >> 0x1e);
+ { uint32_t x33 = (x31 & 0x3fffffff);
+ out[0] = x30;
+ out[1] = x33;
+ out[2] = (x32 + x21);
+ out[3] = x24;
+ out[4] = x27;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e150m3/freeze.c b/src/Specific/solinas32_2e150m3/freeze.c
index 624c3b4d9..893c8987f 100644
--- a/src/Specific/solinas32_2e150m3/freeze.c
+++ b/src/Specific/solinas32_2e150m3/freeze.c
@@ -1,25 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x10;
-out[1] = uint8_t x11 = Op Syntax.SubWithGetBorrow 30 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3ffffffd;;
+static void freeze(uint32_t out[5], const uint32_t in1[5]) {
+ { const uint32_t x7 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x10, uint8_t x11 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x3ffffffd);
+ { uint32_t x13, uint8_t x14 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x11, Return x4, 0x3fffffff);
+ { uint32_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x14, Return x6, 0x3fffffff);
+ { uint32_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x17, Return x8, 0x3fffffff);
+ { uint32_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x20, Return x7, 0x3fffffff);
+ { uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
+ { uint32_t x25 = (x24 & 0x3ffffffd);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x10, Return x25);
+ { uint32_t x29 = (x24 & 0x3fffffff);
+ { uint32_t x31, uint8_t x32 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x28, Return x13, Return x29);
+ { uint32_t x33 = (x24 & 0x3fffffff);
+ { uint32_t x35, uint8_t x36 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x32, Return x16, Return x33);
+ { uint32_t x37 = (x24 & 0x3fffffff);
+ { uint32_t x39, uint8_t x40 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x36, Return x19, Return x37);
+ { uint32_t x41 = (x24 & 0x3fffffff);
+ { uint32_t x43, uint8_t _ = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x40, Return x22, Return x41);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e150m5/femul.c b/src/Specific/solinas32_2e150m5/femul.c
index 2100f2db0..7c3a9970d 100644
--- a/src/Specific/solinas32_2e150m5/femul.c
+++ b/src/Specific/solinas32_2e150m5/femul.c
@@ -1,56 +1,50 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x24 = (((uint64_t)x5 * x22) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + (((uint64_t)x13 * x17) + ((uint64_t)x12 * x15))))));
-{ uint64_t x25 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + (((uint64_t)x9 * x19) + (((uint64_t)x11 * x17) + ((uint64_t)x13 * x15))))) + (0x5 * ((uint64_t)x12 * x22)));
-{ uint64_t x26 = ((((uint64_t)x5 * x21) + (((uint64_t)x7 * x19) + (((uint64_t)x9 * x17) + ((uint64_t)x11 * x15)))) + (0x5 * (((uint64_t)x13 * x22) + ((uint64_t)x12 * x23))));
-{ uint64_t x27 = ((((uint64_t)x5 * x19) + (((uint64_t)x7 * x17) + ((uint64_t)x9 * x15))) + (0x5 * (((uint64_t)x11 * x22) + (((uint64_t)x13 * x23) + ((uint64_t)x12 * x21)))));
-{ uint64_t x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) + (0x5 * (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19))))));
-{ uint64_t x29 = (((uint64_t)x5 * x15) + (0x5 * (((uint64_t)x7 * x22) + (((uint64_t)x9 * x23) + (((uint64_t)x11 * x21) + (((uint64_t)x13 * x19) + ((uint64_t)x12 * x17)))))));
-{ uint64_t x30 = (x29 >> 0x19);
-{ uint32_t x31 = ((uint32_t)x29 & 0x1ffffff);
-{ uint64_t x32 = (x30 + x28);
-{ uint64_t x33 = (x32 >> 0x19);
-{ uint32_t x34 = ((uint32_t)x32 & 0x1ffffff);
-{ uint64_t x35 = (x33 + x27);
-{ uint64_t x36 = (x35 >> 0x19);
-{ uint32_t x37 = ((uint32_t)x35 & 0x1ffffff);
-{ uint64_t x38 = (x36 + x26);
-{ uint64_t x39 = (x38 >> 0x19);
-{ uint32_t x40 = ((uint32_t)x38 & 0x1ffffff);
-{ uint64_t x41 = (x39 + x25);
-{ uint32_t x42 = (uint32_t) (x41 >> 0x19);
-{ uint32_t x43 = ((uint32_t)x41 & 0x1ffffff);
-{ uint64_t x44 = (x42 + x24);
-{ uint32_t x45 = (uint32_t) (x44 >> 0x19);
-{ uint32_t x46 = ((uint32_t)x44 & 0x1ffffff);
-{ uint64_t x47 = (x31 + ((uint64_t)0x5 * x45));
-{ uint32_t x48 = (uint32_t) (x47 >> 0x19);
-{ uint32_t x49 = ((uint32_t)x47 & 0x1ffffff);
-{ uint32_t x50 = (x48 + x34);
-{ uint32_t x51 = (x50 >> 0x19);
-{ uint32_t x52 = (x50 & 0x1ffffff);
-out[0] = x46;
-out[1] = x43;
-out[2] = x40;
-out[3] = x51 + x37;
-out[4] = x52;
-out[5] = x49;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint64_t x24 = (((uint64_t)x5 * x22) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + (((uint64_t)x13 * x17) + ((uint64_t)x12 * x15))))));
+ { uint64_t x25 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + (((uint64_t)x9 * x19) + (((uint64_t)x11 * x17) + ((uint64_t)x13 * x15))))) + (0x5 * ((uint64_t)x12 * x22)));
+ { uint64_t x26 = ((((uint64_t)x5 * x21) + (((uint64_t)x7 * x19) + (((uint64_t)x9 * x17) + ((uint64_t)x11 * x15)))) + (0x5 * (((uint64_t)x13 * x22) + ((uint64_t)x12 * x23))));
+ { uint64_t x27 = ((((uint64_t)x5 * x19) + (((uint64_t)x7 * x17) + ((uint64_t)x9 * x15))) + (0x5 * (((uint64_t)x11 * x22) + (((uint64_t)x13 * x23) + ((uint64_t)x12 * x21)))));
+ { uint64_t x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) + (0x5 * (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19))))));
+ { uint64_t x29 = (((uint64_t)x5 * x15) + (0x5 * (((uint64_t)x7 * x22) + (((uint64_t)x9 * x23) + (((uint64_t)x11 * x21) + (((uint64_t)x13 * x19) + ((uint64_t)x12 * x17)))))));
+ { uint64_t x30 = (x29 >> 0x19);
+ { uint32_t x31 = ((uint32_t)x29 & 0x1ffffff);
+ { uint64_t x32 = (x30 + x28);
+ { uint64_t x33 = (x32 >> 0x19);
+ { uint32_t x34 = ((uint32_t)x32 & 0x1ffffff);
+ { uint64_t x35 = (x33 + x27);
+ { uint64_t x36 = (x35 >> 0x19);
+ { uint32_t x37 = ((uint32_t)x35 & 0x1ffffff);
+ { uint64_t x38 = (x36 + x26);
+ { uint64_t x39 = (x38 >> 0x19);
+ { uint32_t x40 = ((uint32_t)x38 & 0x1ffffff);
+ { uint64_t x41 = (x39 + x25);
+ { uint32_t x42 = (uint32_t) (x41 >> 0x19);
+ { uint32_t x43 = ((uint32_t)x41 & 0x1ffffff);
+ { uint64_t x44 = (x42 + x24);
+ { uint32_t x45 = (uint32_t) (x44 >> 0x19);
+ { uint32_t x46 = ((uint32_t)x44 & 0x1ffffff);
+ { uint64_t x47 = (x31 + ((uint64_t)0x5 * x45));
+ { uint32_t x48 = (uint32_t) (x47 >> 0x19);
+ { uint32_t x49 = ((uint32_t)x47 & 0x1ffffff);
+ { uint32_t x50 = (x48 + x34);
+ { uint32_t x51 = (x50 >> 0x19);
+ { uint32_t x52 = (x50 & 0x1ffffff);
+ out[0] = x49;
+ out[1] = x52;
+ out[2] = (x51 + x37);
+ out[3] = x40;
+ out[4] = x43;
+ out[5] = x46;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e150m5/fesquare.c b/src/Specific/solinas32_2e150m5/fesquare.c
index f45942450..7210a32c4 100644
--- a/src/Specific/solinas32_2e150m5/fesquare.c
+++ b/src/Specific/solinas32_2e150m5/fesquare.c
@@ -1,56 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x11 = (((uint64_t)x2 * x9) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x9 * x2))))));
-{ uint64_t x12 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x5 * ((uint64_t)x9 * x9)));
-{ uint64_t x13 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x5 * (((uint64_t)x10 * x9) + ((uint64_t)x9 * x10))));
-{ uint64_t x14 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x5 * (((uint64_t)x8 * x9) + (((uint64_t)x10 * x10) + ((uint64_t)x9 * x8)))));
-{ uint64_t x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x5 * (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6))))));
-{ uint64_t x16 = (((uint64_t)x2 * x2) + (0x5 * (((uint64_t)x4 * x9) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + ((uint64_t)x9 * x4)))))));
-{ uint64_t x17 = (x16 >> 0x19);
-{ uint32_t x18 = ((uint32_t)x16 & 0x1ffffff);
-{ uint64_t x19 = (x17 + x15);
-{ uint64_t x20 = (x19 >> 0x19);
-{ uint32_t x21 = ((uint32_t)x19 & 0x1ffffff);
-{ uint64_t x22 = (x20 + x14);
-{ uint64_t x23 = (x22 >> 0x19);
-{ uint32_t x24 = ((uint32_t)x22 & 0x1ffffff);
-{ uint64_t x25 = (x23 + x13);
-{ uint64_t x26 = (x25 >> 0x19);
-{ uint32_t x27 = ((uint32_t)x25 & 0x1ffffff);
-{ uint64_t x28 = (x26 + x12);
-{ uint32_t x29 = (uint32_t) (x28 >> 0x19);
-{ uint32_t x30 = ((uint32_t)x28 & 0x1ffffff);
-{ uint64_t x31 = (x29 + x11);
-{ uint32_t x32 = (uint32_t) (x31 >> 0x19);
-{ uint32_t x33 = ((uint32_t)x31 & 0x1ffffff);
-{ uint64_t x34 = (x18 + ((uint64_t)0x5 * x32));
-{ uint32_t x35 = (uint32_t) (x34 >> 0x19);
-{ uint32_t x36 = ((uint32_t)x34 & 0x1ffffff);
-{ uint32_t x37 = (x35 + x21);
-{ uint32_t x38 = (x37 >> 0x19);
-{ uint32_t x39 = (x37 & 0x1ffffff);
-out[0] = x33;
-out[1] = x30;
-out[2] = x27;
-out[3] = x38 + x24;
-out[4] = x39;
-out[5] = x36;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesquare(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x11 = (((uint64_t)x2 * x9) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x9 * x2))))));
+ { uint64_t x12 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x5 * ((uint64_t)x9 * x9)));
+ { uint64_t x13 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x5 * (((uint64_t)x10 * x9) + ((uint64_t)x9 * x10))));
+ { uint64_t x14 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x5 * (((uint64_t)x8 * x9) + (((uint64_t)x10 * x10) + ((uint64_t)x9 * x8)))));
+ { uint64_t x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x5 * (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6))))));
+ { uint64_t x16 = (((uint64_t)x2 * x2) + (0x5 * (((uint64_t)x4 * x9) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + ((uint64_t)x9 * x4)))))));
+ { uint64_t x17 = (x16 >> 0x19);
+ { uint32_t x18 = ((uint32_t)x16 & 0x1ffffff);
+ { uint64_t x19 = (x17 + x15);
+ { uint64_t x20 = (x19 >> 0x19);
+ { uint32_t x21 = ((uint32_t)x19 & 0x1ffffff);
+ { uint64_t x22 = (x20 + x14);
+ { uint64_t x23 = (x22 >> 0x19);
+ { uint32_t x24 = ((uint32_t)x22 & 0x1ffffff);
+ { uint64_t x25 = (x23 + x13);
+ { uint64_t x26 = (x25 >> 0x19);
+ { uint32_t x27 = ((uint32_t)x25 & 0x1ffffff);
+ { uint64_t x28 = (x26 + x12);
+ { uint32_t x29 = (uint32_t) (x28 >> 0x19);
+ { uint32_t x30 = ((uint32_t)x28 & 0x1ffffff);
+ { uint64_t x31 = (x29 + x11);
+ { uint32_t x32 = (uint32_t) (x31 >> 0x19);
+ { uint32_t x33 = ((uint32_t)x31 & 0x1ffffff);
+ { uint64_t x34 = (x18 + ((uint64_t)0x5 * x32));
+ { uint32_t x35 = (uint32_t) (x34 >> 0x19);
+ { uint32_t x36 = ((uint32_t)x34 & 0x1ffffff);
+ { uint32_t x37 = (x35 + x21);
+ { uint32_t x38 = (x37 >> 0x19);
+ { uint32_t x39 = (x37 & 0x1ffffff);
+ out[0] = x36;
+ out[1] = x39;
+ out[2] = (x38 + x24);
+ out[3] = x27;
+ out[4] = x30;
+ out[5] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e150m5/freeze.c b/src/Specific/solinas32_2e150m5/freeze.c
index 4dee1e6d1..190a62637 100644
--- a/src/Specific/solinas32_2e150m5/freeze.c
+++ b/src/Specific/solinas32_2e150m5/freeze.c
@@ -1,25 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x12;
-out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 25 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x1fffffb;;
+static void freeze(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x1fffffb);
+ { uint32_t x15, uint8_t x16 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x13, Return x4, 0x1ffffff);
+ { uint32_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x16, Return x6, 0x1ffffff);
+ { uint32_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x19, Return x8, 0x1ffffff);
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x22, Return x10, 0x1ffffff);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x9, 0x1ffffff);
+ { uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+ { uint32_t x30 = (x29 & 0x1fffffb);
+ { uint32_t x32, uint8_t x33 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x12, Return x30);
+ { uint32_t x34 = (x29 & 0x1ffffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x15, Return x34);
+ { uint32_t x38 = (x29 & 0x1ffffff);
+ { uint32_t x40, uint8_t x41 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x18, Return x38);
+ { uint32_t x42 = (x29 & 0x1ffffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x41, Return x21, Return x42);
+ { uint32_t x46 = (x29 & 0x1ffffff);
+ { uint32_t x48, uint8_t x49 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x24, Return x46);
+ { uint32_t x50 = (x29 & 0x1ffffff);
+ { uint32_t x52, uint8_t _ = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x49, Return x27, Return x50);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e152m17/femul.c b/src/Specific/solinas32_2e152m17/femul.c
index 68895917e..5cf3743f7 100644
--- a/src/Specific/solinas32_2e152m17/femul.c
+++ b/src/Specific/solinas32_2e152m17/femul.c
@@ -1,56 +1,50 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x24 = (((uint64_t)x5 * x22) + ((0x2 * ((uint64_t)x7 * x23)) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + ((0x2 * ((uint64_t)x13 * x17)) + ((uint64_t)x12 * x15))))));
-{ uint64_t x25 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + (((uint64_t)x9 * x19) + (((uint64_t)x11 * x17) + ((uint64_t)x13 * x15))))) + (0x11 * ((uint64_t)x12 * x22)));
-{ uint64_t x26 = ((((uint64_t)x5 * x21) + ((0x2 * ((uint64_t)x7 * x19)) + ((0x2 * ((uint64_t)x9 * x17)) + ((uint64_t)x11 * x15)))) + (0x11 * ((0x2 * ((uint64_t)x13 * x22)) + (0x2 * ((uint64_t)x12 * x23)))));
-{ uint64_t x27 = ((((uint64_t)x5 * x19) + ((0x2 * ((uint64_t)x7 * x17)) + ((uint64_t)x9 * x15))) + (0x11 * (((uint64_t)x11 * x22) + ((0x2 * ((uint64_t)x13 * x23)) + ((uint64_t)x12 * x21)))));
-{ uint64_t x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) + (0x11 * (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19))))));
-{ uint64_t x29 = (((uint64_t)x5 * x15) + (0x11 * ((0x2 * ((uint64_t)x7 * x22)) + ((0x2 * ((uint64_t)x9 * x23)) + (((uint64_t)x11 * x21) + ((0x2 * ((uint64_t)x13 * x19)) + (0x2 * ((uint64_t)x12 * x17))))))));
-{ uint64_t x30 = (x29 >> 0x1a);
-{ uint32_t x31 = ((uint32_t)x29 & 0x3ffffff);
-{ uint64_t x32 = (x30 + x28);
-{ uint64_t x33 = (x32 >> 0x19);
-{ uint32_t x34 = ((uint32_t)x32 & 0x1ffffff);
-{ uint64_t x35 = (x33 + x27);
-{ uint64_t x36 = (x35 >> 0x19);
-{ uint32_t x37 = ((uint32_t)x35 & 0x1ffffff);
-{ uint64_t x38 = (x36 + x26);
-{ uint64_t x39 = (x38 >> 0x1a);
-{ uint32_t x40 = ((uint32_t)x38 & 0x3ffffff);
-{ uint64_t x41 = (x39 + x25);
-{ uint64_t x42 = (x41 >> 0x19);
-{ uint32_t x43 = ((uint32_t)x41 & 0x1ffffff);
-{ uint64_t x44 = (x42 + x24);
-{ uint64_t x45 = (x44 >> 0x19);
-{ uint32_t x46 = ((uint32_t)x44 & 0x1ffffff);
-{ uint64_t x47 = (x31 + (0x11 * x45));
-{ uint32_t x48 = (uint32_t) (x47 >> 0x1a);
-{ uint32_t x49 = ((uint32_t)x47 & 0x3ffffff);
-{ uint32_t x50 = (x48 + x34);
-{ uint32_t x51 = (x50 >> 0x19);
-{ uint32_t x52 = (x50 & 0x1ffffff);
-out[0] = x46;
-out[1] = x43;
-out[2] = x40;
-out[3] = x51 + x37;
-out[4] = x52;
-out[5] = x49;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint64_t x24 = (((uint64_t)x5 * x22) + ((0x2 * ((uint64_t)x7 * x23)) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + ((0x2 * ((uint64_t)x13 * x17)) + ((uint64_t)x12 * x15))))));
+ { uint64_t x25 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + (((uint64_t)x9 * x19) + (((uint64_t)x11 * x17) + ((uint64_t)x13 * x15))))) + (0x11 * ((uint64_t)x12 * x22)));
+ { uint64_t x26 = ((((uint64_t)x5 * x21) + ((0x2 * ((uint64_t)x7 * x19)) + ((0x2 * ((uint64_t)x9 * x17)) + ((uint64_t)x11 * x15)))) + (0x11 * ((0x2 * ((uint64_t)x13 * x22)) + (0x2 * ((uint64_t)x12 * x23)))));
+ { uint64_t x27 = ((((uint64_t)x5 * x19) + ((0x2 * ((uint64_t)x7 * x17)) + ((uint64_t)x9 * x15))) + (0x11 * (((uint64_t)x11 * x22) + ((0x2 * ((uint64_t)x13 * x23)) + ((uint64_t)x12 * x21)))));
+ { uint64_t x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) + (0x11 * (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19))))));
+ { uint64_t x29 = (((uint64_t)x5 * x15) + (0x11 * ((0x2 * ((uint64_t)x7 * x22)) + ((0x2 * ((uint64_t)x9 * x23)) + (((uint64_t)x11 * x21) + ((0x2 * ((uint64_t)x13 * x19)) + (0x2 * ((uint64_t)x12 * x17))))))));
+ { uint64_t x30 = (x29 >> 0x1a);
+ { uint32_t x31 = ((uint32_t)x29 & 0x3ffffff);
+ { uint64_t x32 = (x30 + x28);
+ { uint64_t x33 = (x32 >> 0x19);
+ { uint32_t x34 = ((uint32_t)x32 & 0x1ffffff);
+ { uint64_t x35 = (x33 + x27);
+ { uint64_t x36 = (x35 >> 0x19);
+ { uint32_t x37 = ((uint32_t)x35 & 0x1ffffff);
+ { uint64_t x38 = (x36 + x26);
+ { uint64_t x39 = (x38 >> 0x1a);
+ { uint32_t x40 = ((uint32_t)x38 & 0x3ffffff);
+ { uint64_t x41 = (x39 + x25);
+ { uint64_t x42 = (x41 >> 0x19);
+ { uint32_t x43 = ((uint32_t)x41 & 0x1ffffff);
+ { uint64_t x44 = (x42 + x24);
+ { uint64_t x45 = (x44 >> 0x19);
+ { uint32_t x46 = ((uint32_t)x44 & 0x1ffffff);
+ { uint64_t x47 = (x31 + (0x11 * x45));
+ { uint32_t x48 = (uint32_t) (x47 >> 0x1a);
+ { uint32_t x49 = ((uint32_t)x47 & 0x3ffffff);
+ { uint32_t x50 = (x48 + x34);
+ { uint32_t x51 = (x50 >> 0x19);
+ { uint32_t x52 = (x50 & 0x1ffffff);
+ out[0] = x49;
+ out[1] = x52;
+ out[2] = (x51 + x37);
+ out[3] = x40;
+ out[4] = x43;
+ out[5] = x46;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e152m17/fesquare.c b/src/Specific/solinas32_2e152m17/fesquare.c
index 9f5e71fda..3d2387de8 100644
--- a/src/Specific/solinas32_2e152m17/fesquare.c
+++ b/src/Specific/solinas32_2e152m17/fesquare.c
@@ -1,56 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x11 = (((uint64_t)x2 * x9) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x9 * x2))))));
-{ uint64_t x12 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x11 * ((uint64_t)x9 * x9)));
-{ uint64_t x13 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x11 * ((0x2 * ((uint64_t)x10 * x9)) + (0x2 * ((uint64_t)x9 * x10)))));
-{ uint64_t x14 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x11 * (((uint64_t)x8 * x9) + ((0x2 * ((uint64_t)x10 * x10)) + ((uint64_t)x9 * x8)))));
-{ uint64_t x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x11 * (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6))))));
-{ uint64_t x16 = (((uint64_t)x2 * x2) + (0x11 * ((0x2 * ((uint64_t)x4 * x9)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + (0x2 * ((uint64_t)x9 * x4))))))));
-{ uint64_t x17 = (x16 >> 0x1a);
-{ uint32_t x18 = ((uint32_t)x16 & 0x3ffffff);
-{ uint64_t x19 = (x17 + x15);
-{ uint64_t x20 = (x19 >> 0x19);
-{ uint32_t x21 = ((uint32_t)x19 & 0x1ffffff);
-{ uint64_t x22 = (x20 + x14);
-{ uint64_t x23 = (x22 >> 0x19);
-{ uint32_t x24 = ((uint32_t)x22 & 0x1ffffff);
-{ uint64_t x25 = (x23 + x13);
-{ uint64_t x26 = (x25 >> 0x1a);
-{ uint32_t x27 = ((uint32_t)x25 & 0x3ffffff);
-{ uint64_t x28 = (x26 + x12);
-{ uint64_t x29 = (x28 >> 0x19);
-{ uint32_t x30 = ((uint32_t)x28 & 0x1ffffff);
-{ uint64_t x31 = (x29 + x11);
-{ uint64_t x32 = (x31 >> 0x19);
-{ uint32_t x33 = ((uint32_t)x31 & 0x1ffffff);
-{ uint64_t x34 = (x18 + (0x11 * x32));
-{ uint32_t x35 = (uint32_t) (x34 >> 0x1a);
-{ uint32_t x36 = ((uint32_t)x34 & 0x3ffffff);
-{ uint32_t x37 = (x35 + x21);
-{ uint32_t x38 = (x37 >> 0x19);
-{ uint32_t x39 = (x37 & 0x1ffffff);
-out[0] = x33;
-out[1] = x30;
-out[2] = x27;
-out[3] = x38 + x24;
-out[4] = x39;
-out[5] = x36;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesquare(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x11 = (((uint64_t)x2 * x9) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x9 * x2))))));
+ { uint64_t x12 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x11 * ((uint64_t)x9 * x9)));
+ { uint64_t x13 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x11 * ((0x2 * ((uint64_t)x10 * x9)) + (0x2 * ((uint64_t)x9 * x10)))));
+ { uint64_t x14 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x11 * (((uint64_t)x8 * x9) + ((0x2 * ((uint64_t)x10 * x10)) + ((uint64_t)x9 * x8)))));
+ { uint64_t x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x11 * (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6))))));
+ { uint64_t x16 = (((uint64_t)x2 * x2) + (0x11 * ((0x2 * ((uint64_t)x4 * x9)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + (0x2 * ((uint64_t)x9 * x4))))))));
+ { uint64_t x17 = (x16 >> 0x1a);
+ { uint32_t x18 = ((uint32_t)x16 & 0x3ffffff);
+ { uint64_t x19 = (x17 + x15);
+ { uint64_t x20 = (x19 >> 0x19);
+ { uint32_t x21 = ((uint32_t)x19 & 0x1ffffff);
+ { uint64_t x22 = (x20 + x14);
+ { uint64_t x23 = (x22 >> 0x19);
+ { uint32_t x24 = ((uint32_t)x22 & 0x1ffffff);
+ { uint64_t x25 = (x23 + x13);
+ { uint64_t x26 = (x25 >> 0x1a);
+ { uint32_t x27 = ((uint32_t)x25 & 0x3ffffff);
+ { uint64_t x28 = (x26 + x12);
+ { uint64_t x29 = (x28 >> 0x19);
+ { uint32_t x30 = ((uint32_t)x28 & 0x1ffffff);
+ { uint64_t x31 = (x29 + x11);
+ { uint64_t x32 = (x31 >> 0x19);
+ { uint32_t x33 = ((uint32_t)x31 & 0x1ffffff);
+ { uint64_t x34 = (x18 + (0x11 * x32));
+ { uint32_t x35 = (uint32_t) (x34 >> 0x1a);
+ { uint32_t x36 = ((uint32_t)x34 & 0x3ffffff);
+ { uint32_t x37 = (x35 + x21);
+ { uint32_t x38 = (x37 >> 0x19);
+ { uint32_t x39 = (x37 & 0x1ffffff);
+ out[0] = x36;
+ out[1] = x39;
+ out[2] = (x38 + x24);
+ out[3] = x27;
+ out[4] = x30;
+ out[5] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e152m17/freeze.c b/src/Specific/solinas32_2e152m17/freeze.c
index e26984208..23f92e53d 100644
--- a/src/Specific/solinas32_2e152m17/freeze.c
+++ b/src/Specific/solinas32_2e152m17/freeze.c
@@ -1,25 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x12;
-out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 26 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3ffffef;;
+static void freeze(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x3ffffef);
+ { uint32_t x15, uint8_t x16 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x13, Return x4, 0x1ffffff);
+ { uint32_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x16, Return x6, 0x1ffffff);
+ { uint32_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x19, Return x8, 0x3ffffff);
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x22, Return x10, 0x1ffffff);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x9, 0x1ffffff);
+ { uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+ { uint32_t x30 = (x29 & 0x3ffffef);
+ { uint32_t x32, uint8_t x33 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x12, Return x30);
+ { uint32_t x34 = (x29 & 0x1ffffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x15, Return x34);
+ { uint32_t x38 = (x29 & 0x1ffffff);
+ { uint32_t x40, uint8_t x41 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x18, Return x38);
+ { uint32_t x42 = (x29 & 0x3ffffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x41, Return x21, Return x42);
+ { uint32_t x46 = (x29 & 0x1ffffff);
+ { uint32_t x48, uint8_t x49 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x24, Return x46);
+ { uint32_t x50 = (x29 & 0x1ffffff);
+ { uint32_t x52, uint8_t _ = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x49, Return x27, Return x50);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e158m15/femul.c b/src/Specific/solinas32_2e158m15/femul.c
index 0af9ac6da..353afb2a8 100644
--- a/src/Specific/solinas32_2e158m15/femul.c
+++ b/src/Specific/solinas32_2e158m15/femul.c
@@ -1,56 +1,50 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint64_t x24 = (((uint64_t)x5 * x22) + ((0x2 * ((uint64_t)x7 * x23)) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + ((0x2 * ((uint64_t)x13 * x17)) + ((uint64_t)x12 * x15))))));
-{ uint64_t x25 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + (((uint64_t)x9 * x19) + (((uint64_t)x11 * x17) + ((uint64_t)x13 * x15))))) + (0xf * ((uint64_t)x12 * x22)));
-{ uint64_t x26 = ((((uint64_t)x5 * x21) + ((0x2 * ((uint64_t)x7 * x19)) + ((0x2 * ((uint64_t)x9 * x17)) + ((uint64_t)x11 * x15)))) + (0xf * ((0x2 * ((uint64_t)x13 * x22)) + (0x2 * ((uint64_t)x12 * x23)))));
-{ uint64_t x27 = ((((uint64_t)x5 * x19) + ((0x2 * ((uint64_t)x7 * x17)) + ((uint64_t)x9 * x15))) + (0xf * (((uint64_t)x11 * x22) + ((0x2 * ((uint64_t)x13 * x23)) + ((uint64_t)x12 * x21)))));
-{ uint64_t x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) + (0xf * (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19))))));
-{ uint64_t x29 = (((uint64_t)x5 * x15) + (0xf * ((0x2 * ((uint64_t)x7 * x22)) + ((0x2 * ((uint64_t)x9 * x23)) + (((uint64_t)x11 * x21) + ((0x2 * ((uint64_t)x13 * x19)) + (0x2 * ((uint64_t)x12 * x17))))))));
-{ uint64_t x30 = (x29 >> 0x1b);
-{ uint32_t x31 = ((uint32_t)x29 & 0x7ffffff);
-{ uint64_t x32 = (x30 + x28);
-{ uint64_t x33 = (x32 >> 0x1a);
-{ uint32_t x34 = ((uint32_t)x32 & 0x3ffffff);
-{ uint64_t x35 = (x33 + x27);
-{ uint64_t x36 = (x35 >> 0x1a);
-{ uint32_t x37 = ((uint32_t)x35 & 0x3ffffff);
-{ uint64_t x38 = (x36 + x26);
-{ uint64_t x39 = (x38 >> 0x1b);
-{ uint32_t x40 = ((uint32_t)x38 & 0x7ffffff);
-{ uint64_t x41 = (x39 + x25);
-{ uint64_t x42 = (x41 >> 0x1a);
-{ uint32_t x43 = ((uint32_t)x41 & 0x3ffffff);
-{ uint64_t x44 = (x42 + x24);
-{ uint64_t x45 = (x44 >> 0x1a);
-{ uint32_t x46 = ((uint32_t)x44 & 0x3ffffff);
-{ uint64_t x47 = (x31 + (0xf * x45));
-{ uint32_t x48 = (uint32_t) (x47 >> 0x1b);
-{ uint32_t x49 = ((uint32_t)x47 & 0x7ffffff);
-{ uint32_t x50 = (x48 + x34);
-{ uint32_t x51 = (x50 >> 0x1a);
-{ uint32_t x52 = (x50 & 0x3ffffff);
-out[0] = x46;
-out[1] = x43;
-out[2] = x40;
-out[3] = x51 + x37;
-out[4] = x52;
-out[5] = x49;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { uint64_t x24 = (((uint64_t)x5 * x22) + ((0x2 * ((uint64_t)x7 * x23)) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + ((0x2 * ((uint64_t)x13 * x17)) + ((uint64_t)x12 * x15))))));
+ { uint64_t x25 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + (((uint64_t)x9 * x19) + (((uint64_t)x11 * x17) + ((uint64_t)x13 * x15))))) + (0xf * ((uint64_t)x12 * x22)));
+ { uint64_t x26 = ((((uint64_t)x5 * x21) + ((0x2 * ((uint64_t)x7 * x19)) + ((0x2 * ((uint64_t)x9 * x17)) + ((uint64_t)x11 * x15)))) + (0xf * ((0x2 * ((uint64_t)x13 * x22)) + (0x2 * ((uint64_t)x12 * x23)))));
+ { uint64_t x27 = ((((uint64_t)x5 * x19) + ((0x2 * ((uint64_t)x7 * x17)) + ((uint64_t)x9 * x15))) + (0xf * (((uint64_t)x11 * x22) + ((0x2 * ((uint64_t)x13 * x23)) + ((uint64_t)x12 * x21)))));
+ { uint64_t x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) + (0xf * (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19))))));
+ { uint64_t x29 = (((uint64_t)x5 * x15) + (0xf * ((0x2 * ((uint64_t)x7 * x22)) + ((0x2 * ((uint64_t)x9 * x23)) + (((uint64_t)x11 * x21) + ((0x2 * ((uint64_t)x13 * x19)) + (0x2 * ((uint64_t)x12 * x17))))))));
+ { uint64_t x30 = (x29 >> 0x1b);
+ { uint32_t x31 = ((uint32_t)x29 & 0x7ffffff);
+ { uint64_t x32 = (x30 + x28);
+ { uint64_t x33 = (x32 >> 0x1a);
+ { uint32_t x34 = ((uint32_t)x32 & 0x3ffffff);
+ { uint64_t x35 = (x33 + x27);
+ { uint64_t x36 = (x35 >> 0x1a);
+ { uint32_t x37 = ((uint32_t)x35 & 0x3ffffff);
+ { uint64_t x38 = (x36 + x26);
+ { uint64_t x39 = (x38 >> 0x1b);
+ { uint32_t x40 = ((uint32_t)x38 & 0x7ffffff);
+ { uint64_t x41 = (x39 + x25);
+ { uint64_t x42 = (x41 >> 0x1a);
+ { uint32_t x43 = ((uint32_t)x41 & 0x3ffffff);
+ { uint64_t x44 = (x42 + x24);
+ { uint64_t x45 = (x44 >> 0x1a);
+ { uint32_t x46 = ((uint32_t)x44 & 0x3ffffff);
+ { uint64_t x47 = (x31 + (0xf * x45));
+ { uint32_t x48 = (uint32_t) (x47 >> 0x1b);
+ { uint32_t x49 = ((uint32_t)x47 & 0x7ffffff);
+ { uint32_t x50 = (x48 + x34);
+ { uint32_t x51 = (x50 >> 0x1a);
+ { uint32_t x52 = (x50 & 0x3ffffff);
+ out[0] = x49;
+ out[1] = x52;
+ out[2] = (x51 + x37);
+ out[3] = x40;
+ out[4] = x43;
+ out[5] = x46;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e158m15/fesquare.c b/src/Specific/solinas32_2e158m15/fesquare.c
index 81cdfb2c9..588f6154b 100644
--- a/src/Specific/solinas32_2e158m15/fesquare.c
+++ b/src/Specific/solinas32_2e158m15/fesquare.c
@@ -1,56 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x11 = (((uint64_t)x2 * x9) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x9 * x2))))));
-{ uint64_t x12 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0xf * ((uint64_t)x9 * x9)));
-{ uint64_t x13 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0xf * ((0x2 * ((uint64_t)x10 * x9)) + (0x2 * ((uint64_t)x9 * x10)))));
-{ uint64_t x14 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0xf * (((uint64_t)x8 * x9) + ((0x2 * ((uint64_t)x10 * x10)) + ((uint64_t)x9 * x8)))));
-{ uint64_t x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0xf * (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6))))));
-{ uint64_t x16 = (((uint64_t)x2 * x2) + (0xf * ((0x2 * ((uint64_t)x4 * x9)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + (0x2 * ((uint64_t)x9 * x4))))))));
-{ uint64_t x17 = (x16 >> 0x1b);
-{ uint32_t x18 = ((uint32_t)x16 & 0x7ffffff);
-{ uint64_t x19 = (x17 + x15);
-{ uint64_t x20 = (x19 >> 0x1a);
-{ uint32_t x21 = ((uint32_t)x19 & 0x3ffffff);
-{ uint64_t x22 = (x20 + x14);
-{ uint64_t x23 = (x22 >> 0x1a);
-{ uint32_t x24 = ((uint32_t)x22 & 0x3ffffff);
-{ uint64_t x25 = (x23 + x13);
-{ uint64_t x26 = (x25 >> 0x1b);
-{ uint32_t x27 = ((uint32_t)x25 & 0x7ffffff);
-{ uint64_t x28 = (x26 + x12);
-{ uint64_t x29 = (x28 >> 0x1a);
-{ uint32_t x30 = ((uint32_t)x28 & 0x3ffffff);
-{ uint64_t x31 = (x29 + x11);
-{ uint64_t x32 = (x31 >> 0x1a);
-{ uint32_t x33 = ((uint32_t)x31 & 0x3ffffff);
-{ uint64_t x34 = (x18 + (0xf * x32));
-{ uint32_t x35 = (uint32_t) (x34 >> 0x1b);
-{ uint32_t x36 = ((uint32_t)x34 & 0x7ffffff);
-{ uint32_t x37 = (x35 + x21);
-{ uint32_t x38 = (x37 >> 0x1a);
-{ uint32_t x39 = (x37 & 0x3ffffff);
-out[0] = x33;
-out[1] = x30;
-out[2] = x27;
-out[3] = x38 + x24;
-out[4] = x39;
-out[5] = x36;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesquare(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x11 = (((uint64_t)x2 * x9) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x9 * x2))))));
+ { uint64_t x12 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0xf * ((uint64_t)x9 * x9)));
+ { uint64_t x13 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0xf * ((0x2 * ((uint64_t)x10 * x9)) + (0x2 * ((uint64_t)x9 * x10)))));
+ { uint64_t x14 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0xf * (((uint64_t)x8 * x9) + ((0x2 * ((uint64_t)x10 * x10)) + ((uint64_t)x9 * x8)))));
+ { uint64_t x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0xf * (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6))))));
+ { uint64_t x16 = (((uint64_t)x2 * x2) + (0xf * ((0x2 * ((uint64_t)x4 * x9)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + (0x2 * ((uint64_t)x9 * x4))))))));
+ { uint64_t x17 = (x16 >> 0x1b);
+ { uint32_t x18 = ((uint32_t)x16 & 0x7ffffff);
+ { uint64_t x19 = (x17 + x15);
+ { uint64_t x20 = (x19 >> 0x1a);
+ { uint32_t x21 = ((uint32_t)x19 & 0x3ffffff);
+ { uint64_t x22 = (x20 + x14);
+ { uint64_t x23 = (x22 >> 0x1a);
+ { uint32_t x24 = ((uint32_t)x22 & 0x3ffffff);
+ { uint64_t x25 = (x23 + x13);
+ { uint64_t x26 = (x25 >> 0x1b);
+ { uint32_t x27 = ((uint32_t)x25 & 0x7ffffff);
+ { uint64_t x28 = (x26 + x12);
+ { uint64_t x29 = (x28 >> 0x1a);
+ { uint32_t x30 = ((uint32_t)x28 & 0x3ffffff);
+ { uint64_t x31 = (x29 + x11);
+ { uint64_t x32 = (x31 >> 0x1a);
+ { uint32_t x33 = ((uint32_t)x31 & 0x3ffffff);
+ { uint64_t x34 = (x18 + (0xf * x32));
+ { uint32_t x35 = (uint32_t) (x34 >> 0x1b);
+ { uint32_t x36 = ((uint32_t)x34 & 0x7ffffff);
+ { uint32_t x37 = (x35 + x21);
+ { uint32_t x38 = (x37 >> 0x1a);
+ { uint32_t x39 = (x37 & 0x3ffffff);
+ out[0] = x36;
+ out[1] = x39;
+ out[2] = (x38 + x24);
+ out[3] = x27;
+ out[4] = x30;
+ out[5] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e158m15/freeze.c b/src/Specific/solinas32_2e158m15/freeze.c
index e57392de5..e9883c30c 100644
--- a/src/Specific/solinas32_2e158m15/freeze.c
+++ b/src/Specific/solinas32_2e158m15/freeze.c
@@ -1,25 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x12;
-out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 27 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7fffff1;;
+static void freeze(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x7fffff1);
+ { uint32_t x15, uint8_t x16 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x13, Return x4, 0x3ffffff);
+ { uint32_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x16, Return x6, 0x3ffffff);
+ { uint32_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x19, Return x8, 0x7ffffff);
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x22, Return x10, 0x3ffffff);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x9, 0x3ffffff);
+ { uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+ { uint32_t x30 = (x29 & 0x7fffff1);
+ { uint32_t x32, uint8_t x33 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x12, Return x30);
+ { uint32_t x34 = (x29 & 0x3ffffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x15, Return x34);
+ { uint32_t x38 = (x29 & 0x3ffffff);
+ { uint32_t x40, uint8_t x41 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x18, Return x38);
+ { uint32_t x42 = (x29 & 0x7ffffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x41, Return x21, Return x42);
+ { uint32_t x46 = (x29 & 0x3ffffff);
+ { uint32_t x48, uint8_t x49 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x24, Return x46);
+ { uint32_t x50 = (x29 & 0x3ffffff);
+ { uint32_t x52, uint8_t _ = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x49, Return x27, Return x50);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e165m25/femul.c b/src/Specific/solinas32_2e165m25/femul.c
index 7f0aa5f38..3ac462ae3 100644
--- a/src/Specific/solinas32_2e165m25/femul.c
+++ b/src/Specific/solinas32_2e165m25/femul.c
@@ -1,71 +1,71 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
-{ uint64_t x36 = (((uint64_t)x5 * x34) + ((0x2 * ((uint64_t)x7 * x35)) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + ((0x2 * ((uint64_t)x13 * x29)) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((0x2 * ((uint64_t)x19 * x23)) + ((uint64_t)x18 * x21)))))))));
-{ uint64_t x37 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x19 * x21)))))))) + (0x19 * ((uint64_t)x18 * x34)));
-{ uint64_t x38 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + (((uint64_t)x11 * x27) + ((0x2 * ((uint64_t)x13 * x25)) + ((0x2 * ((uint64_t)x15 * x23)) + ((uint64_t)x17 * x21))))))) + (0x19 * ((0x2 * ((uint64_t)x19 * x34)) + (0x2 * ((uint64_t)x18 * x35)))));
-{ uint64_t x39 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + ((0x2 * ((uint64_t)x13 * x23)) + ((uint64_t)x15 * x21)))))) + (0x19 * (((uint64_t)x17 * x34) + ((0x2 * ((uint64_t)x19 * x35)) + ((uint64_t)x18 * x33)))));
-{ uint64_t x40 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + ((uint64_t)x13 * x21))))) + (0x19 * (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))));
-{ uint64_t x41 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((uint64_t)x11 * x21)))) + (0x19 * ((0x2 * ((uint64_t)x13 * x34)) + ((0x2 * ((uint64_t)x15 * x35)) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + (0x2 * ((uint64_t)x18 * x29))))))));
-{ uint64_t x42 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((uint64_t)x9 * x21))) + (0x19 * (((uint64_t)x11 * x34) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x18 * x27))))))));
-{ uint64_t x43 = ((((uint64_t)x5 * x23) + ((uint64_t)x7 * x21)) + (0x19 * (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))));
-{ uint64_t x44 = (((uint64_t)x5 * x21) + (0x19 * ((0x2 * ((uint64_t)x7 * x34)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((0x2 * ((uint64_t)x19 * x25)) + (0x2 * ((uint64_t)x18 * x23)))))))))));
-{ uint32_t x45 = (uint32_t) (x44 >> 0x13);
-{ uint32_t x46 = ((uint32_t)x44 & 0x7ffff);
-{ uint64_t x47 = (x45 + x43);
-{ uint32_t x48 = (uint32_t) (x47 >> 0x12);
-{ uint32_t x49 = ((uint32_t)x47 & 0x3ffff);
-{ uint64_t x50 = (x48 + x42);
-{ uint32_t x51 = (uint32_t) (x50 >> 0x12);
-{ uint32_t x52 = ((uint32_t)x50 & 0x3ffff);
-{ uint64_t x53 = (x51 + x41);
-{ uint32_t x54 = (uint32_t) (x53 >> 0x13);
-{ uint32_t x55 = ((uint32_t)x53 & 0x7ffff);
-{ uint64_t x56 = (x54 + x40);
-{ uint32_t x57 = (uint32_t) (x56 >> 0x12);
-{ uint32_t x58 = ((uint32_t)x56 & 0x3ffff);
-{ uint64_t x59 = (x57 + x39);
-{ uint32_t x60 = (uint32_t) (x59 >> 0x12);
-{ uint32_t x61 = ((uint32_t)x59 & 0x3ffff);
-{ uint64_t x62 = (x60 + x38);
-{ uint32_t x63 = (uint32_t) (x62 >> 0x13);
-{ uint32_t x64 = ((uint32_t)x62 & 0x7ffff);
-{ uint64_t x65 = (x63 + x37);
-{ uint32_t x66 = (uint32_t) (x65 >> 0x12);
-{ uint32_t x67 = ((uint32_t)x65 & 0x3ffff);
-{ uint64_t x68 = (x66 + x36);
-{ uint32_t x69 = (uint32_t) (x68 >> 0x12);
-{ uint32_t x70 = ((uint32_t)x68 & 0x3ffff);
-{ uint32_t x71 = (x46 + (0x19 * x69));
-{ uint32_t x72 = (x71 >> 0x13);
-{ uint32_t x73 = (x71 & 0x7ffff);
-{ uint32_t x74 = (x72 + x49);
-{ uint32_t x75 = (x74 >> 0x12);
-{ uint32_t x76 = (x74 & 0x3ffff);
-out[0] = x70;
-out[1] = x67;
-out[2] = x64;
-out[3] = x61;
-out[4] = x58;
-out[5] = x55;
-out[6] = x75 + x52;
-out[7] = x76;
-out[8] = x73;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void femul(uint32_t out[9], const uint32_t in1[9], const uint32_t in2[9]) {
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x34 = in2[8];
+ { const uint32_t x35 = in2[7];
+ { const uint32_t x33 = in2[6];
+ { const uint32_t x31 = in2[5];
+ { const uint32_t x29 = in2[4];
+ { const uint32_t x27 = in2[3];
+ { const uint32_t x25 = in2[2];
+ { const uint32_t x23 = in2[1];
+ { const uint32_t x21 = in2[0];
+ { uint64_t x36 = (((uint64_t)x5 * x34) + ((0x2 * ((uint64_t)x7 * x35)) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + ((0x2 * ((uint64_t)x13 * x29)) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((0x2 * ((uint64_t)x19 * x23)) + ((uint64_t)x18 * x21)))))))));
+ { uint64_t x37 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x19 * x21)))))))) + (0x19 * ((uint64_t)x18 * x34)));
+ { uint64_t x38 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + (((uint64_t)x11 * x27) + ((0x2 * ((uint64_t)x13 * x25)) + ((0x2 * ((uint64_t)x15 * x23)) + ((uint64_t)x17 * x21))))))) + (0x19 * ((0x2 * ((uint64_t)x19 * x34)) + (0x2 * ((uint64_t)x18 * x35)))));
+ { uint64_t x39 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + ((0x2 * ((uint64_t)x13 * x23)) + ((uint64_t)x15 * x21)))))) + (0x19 * (((uint64_t)x17 * x34) + ((0x2 * ((uint64_t)x19 * x35)) + ((uint64_t)x18 * x33)))));
+ { uint64_t x40 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + ((uint64_t)x13 * x21))))) + (0x19 * (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))));
+ { uint64_t x41 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((uint64_t)x11 * x21)))) + (0x19 * ((0x2 * ((uint64_t)x13 * x34)) + ((0x2 * ((uint64_t)x15 * x35)) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + (0x2 * ((uint64_t)x18 * x29))))))));
+ { uint64_t x42 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((uint64_t)x9 * x21))) + (0x19 * (((uint64_t)x11 * x34) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x18 * x27))))))));
+ { uint64_t x43 = ((((uint64_t)x5 * x23) + ((uint64_t)x7 * x21)) + (0x19 * (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))));
+ { uint64_t x44 = (((uint64_t)x5 * x21) + (0x19 * ((0x2 * ((uint64_t)x7 * x34)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((0x2 * ((uint64_t)x19 * x25)) + (0x2 * ((uint64_t)x18 * x23)))))))))));
+ { uint32_t x45 = (uint32_t) (x44 >> 0x13);
+ { uint32_t x46 = ((uint32_t)x44 & 0x7ffff);
+ { uint64_t x47 = (x45 + x43);
+ { uint32_t x48 = (uint32_t) (x47 >> 0x12);
+ { uint32_t x49 = ((uint32_t)x47 & 0x3ffff);
+ { uint64_t x50 = (x48 + x42);
+ { uint32_t x51 = (uint32_t) (x50 >> 0x12);
+ { uint32_t x52 = ((uint32_t)x50 & 0x3ffff);
+ { uint64_t x53 = (x51 + x41);
+ { uint32_t x54 = (uint32_t) (x53 >> 0x13);
+ { uint32_t x55 = ((uint32_t)x53 & 0x7ffff);
+ { uint64_t x56 = (x54 + x40);
+ { uint32_t x57 = (uint32_t) (x56 >> 0x12);
+ { uint32_t x58 = ((uint32_t)x56 & 0x3ffff);
+ { uint64_t x59 = (x57 + x39);
+ { uint32_t x60 = (uint32_t) (x59 >> 0x12);
+ { uint32_t x61 = ((uint32_t)x59 & 0x3ffff);
+ { uint64_t x62 = (x60 + x38);
+ { uint32_t x63 = (uint32_t) (x62 >> 0x13);
+ { uint32_t x64 = ((uint32_t)x62 & 0x7ffff);
+ { uint64_t x65 = (x63 + x37);
+ { uint32_t x66 = (uint32_t) (x65 >> 0x12);
+ { uint32_t x67 = ((uint32_t)x65 & 0x3ffff);
+ { uint64_t x68 = (x66 + x36);
+ { uint32_t x69 = (uint32_t) (x68 >> 0x12);
+ { uint32_t x70 = ((uint32_t)x68 & 0x3ffff);
+ { uint32_t x71 = (x46 + (0x19 * x69));
+ { uint32_t x72 = (x71 >> 0x13);
+ { uint32_t x73 = (x71 & 0x7ffff);
+ { uint32_t x74 = (x72 + x49);
+ { uint32_t x75 = (x74 >> 0x12);
+ { uint32_t x76 = (x74 & 0x3ffff);
+ out[0] = x73;
+ out[1] = x76;
+ out[2] = (x75 + x52);
+ out[3] = x55;
+ out[4] = x58;
+ out[5] = x61;
+ out[6] = x64;
+ out[7] = x67;
+ out[8] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e165m25/fesquare.c b/src/Specific/solinas32_2e165m25/fesquare.c
index a35d4eb6e..b20c2b582 100644
--- a/src/Specific/solinas32_2e165m25/fesquare.c
+++ b/src/Specific/solinas32_2e165m25/fesquare.c
@@ -1,71 +1,62 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x17 = (((uint64_t)x2 * x15) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + ((0x2 * ((uint64_t)x10 * x10)) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x15 * x2)))))))));
-{ uint64_t x18 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x19 * ((uint64_t)x15 * x15)));
-{ uint64_t x19 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x19 * ((0x2 * ((uint64_t)x16 * x15)) + (0x2 * ((uint64_t)x15 * x16)))));
-{ uint64_t x20 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x19 * (((uint64_t)x14 * x15) + ((0x2 * ((uint64_t)x16 * x16)) + ((uint64_t)x15 * x14)))));
-{ uint64_t x21 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x19 * (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))));
-{ uint64_t x22 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x19 * ((0x2 * ((uint64_t)x10 * x15)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + (0x2 * ((uint64_t)x15 * x10))))))));
-{ uint64_t x23 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x19 * (((uint64_t)x8 * x15) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + ((uint64_t)x15 * x8))))))));
-{ uint64_t x24 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x19 * (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))));
-{ uint64_t x25 = (((uint64_t)x2 * x2) + (0x19 * ((0x2 * ((uint64_t)x4 * x15)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + (0x2 * ((uint64_t)x15 * x4)))))))))));
-{ uint32_t x26 = (uint32_t) (x25 >> 0x13);
-{ uint32_t x27 = ((uint32_t)x25 & 0x7ffff);
-{ uint64_t x28 = (x26 + x24);
-{ uint32_t x29 = (uint32_t) (x28 >> 0x12);
-{ uint32_t x30 = ((uint32_t)x28 & 0x3ffff);
-{ uint64_t x31 = (x29 + x23);
-{ uint32_t x32 = (uint32_t) (x31 >> 0x12);
-{ uint32_t x33 = ((uint32_t)x31 & 0x3ffff);
-{ uint64_t x34 = (x32 + x22);
-{ uint32_t x35 = (uint32_t) (x34 >> 0x13);
-{ uint32_t x36 = ((uint32_t)x34 & 0x7ffff);
-{ uint64_t x37 = (x35 + x21);
-{ uint32_t x38 = (uint32_t) (x37 >> 0x12);
-{ uint32_t x39 = ((uint32_t)x37 & 0x3ffff);
-{ uint64_t x40 = (x38 + x20);
-{ uint32_t x41 = (uint32_t) (x40 >> 0x12);
-{ uint32_t x42 = ((uint32_t)x40 & 0x3ffff);
-{ uint64_t x43 = (x41 + x19);
-{ uint32_t x44 = (uint32_t) (x43 >> 0x13);
-{ uint32_t x45 = ((uint32_t)x43 & 0x7ffff);
-{ uint64_t x46 = (x44 + x18);
-{ uint32_t x47 = (uint32_t) (x46 >> 0x12);
-{ uint32_t x48 = ((uint32_t)x46 & 0x3ffff);
-{ uint64_t x49 = (x47 + x17);
-{ uint32_t x50 = (uint32_t) (x49 >> 0x12);
-{ uint32_t x51 = ((uint32_t)x49 & 0x3ffff);
-{ uint32_t x52 = (x27 + (0x19 * x50));
-{ uint32_t x53 = (x52 >> 0x13);
-{ uint32_t x54 = (x52 & 0x7ffff);
-{ uint32_t x55 = (x53 + x30);
-{ uint32_t x56 = (x55 >> 0x12);
-{ uint32_t x57 = (x55 & 0x3ffff);
-out[0] = x51;
-out[1] = x48;
-out[2] = x45;
-out[3] = x42;
-out[4] = x39;
-out[5] = x36;
-out[6] = x56 + x33;
-out[7] = x57;
-out[8] = x54;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void fesquare(uint32_t out[9], const uint32_t in1[9]) {
+ { const uint32_t x15 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x17 = (((uint64_t)x2 * x15) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + ((0x2 * ((uint64_t)x10 * x10)) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x15 * x2)))))))));
+ { uint64_t x18 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x19 * ((uint64_t)x15 * x15)));
+ { uint64_t x19 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x19 * ((0x2 * ((uint64_t)x16 * x15)) + (0x2 * ((uint64_t)x15 * x16)))));
+ { uint64_t x20 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x19 * (((uint64_t)x14 * x15) + ((0x2 * ((uint64_t)x16 * x16)) + ((uint64_t)x15 * x14)))));
+ { uint64_t x21 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x19 * (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))));
+ { uint64_t x22 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x19 * ((0x2 * ((uint64_t)x10 * x15)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + (0x2 * ((uint64_t)x15 * x10))))))));
+ { uint64_t x23 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x19 * (((uint64_t)x8 * x15) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + ((uint64_t)x15 * x8))))))));
+ { uint64_t x24 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x19 * (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))));
+ { uint64_t x25 = (((uint64_t)x2 * x2) + (0x19 * ((0x2 * ((uint64_t)x4 * x15)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + (0x2 * ((uint64_t)x15 * x4)))))))))));
+ { uint32_t x26 = (uint32_t) (x25 >> 0x13);
+ { uint32_t x27 = ((uint32_t)x25 & 0x7ffff);
+ { uint64_t x28 = (x26 + x24);
+ { uint32_t x29 = (uint32_t) (x28 >> 0x12);
+ { uint32_t x30 = ((uint32_t)x28 & 0x3ffff);
+ { uint64_t x31 = (x29 + x23);
+ { uint32_t x32 = (uint32_t) (x31 >> 0x12);
+ { uint32_t x33 = ((uint32_t)x31 & 0x3ffff);
+ { uint64_t x34 = (x32 + x22);
+ { uint32_t x35 = (uint32_t) (x34 >> 0x13);
+ { uint32_t x36 = ((uint32_t)x34 & 0x7ffff);
+ { uint64_t x37 = (x35 + x21);
+ { uint32_t x38 = (uint32_t) (x37 >> 0x12);
+ { uint32_t x39 = ((uint32_t)x37 & 0x3ffff);
+ { uint64_t x40 = (x38 + x20);
+ { uint32_t x41 = (uint32_t) (x40 >> 0x12);
+ { uint32_t x42 = ((uint32_t)x40 & 0x3ffff);
+ { uint64_t x43 = (x41 + x19);
+ { uint32_t x44 = (uint32_t) (x43 >> 0x13);
+ { uint32_t x45 = ((uint32_t)x43 & 0x7ffff);
+ { uint64_t x46 = (x44 + x18);
+ { uint32_t x47 = (uint32_t) (x46 >> 0x12);
+ { uint32_t x48 = ((uint32_t)x46 & 0x3ffff);
+ { uint64_t x49 = (x47 + x17);
+ { uint32_t x50 = (uint32_t) (x49 >> 0x12);
+ { uint32_t x51 = ((uint32_t)x49 & 0x3ffff);
+ { uint32_t x52 = (x27 + (0x19 * x50));
+ { uint32_t x53 = (x52 >> 0x13);
+ { uint32_t x54 = (x52 & 0x7ffff);
+ { uint32_t x55 = (x53 + x30);
+ { uint32_t x56 = (x55 >> 0x12);
+ { uint32_t x57 = (x55 & 0x3ffff);
+ out[0] = x54;
+ out[1] = x57;
+ out[2] = (x56 + x33);
+ out[3] = x36;
+ out[4] = x39;
+ out[5] = x42;
+ out[6] = x45;
+ out[7] = x48;
+ out[8] = x51;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e165m25/freeze.c b/src/Specific/solinas32_2e165m25/freeze.c
index 592c36e10..69d1a6adb 100644
--- a/src/Specific/solinas32_2e165m25/freeze.c
+++ b/src/Specific/solinas32_2e165m25/freeze.c
@@ -1,25 +1,49 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x18;
-out[1] = uint8_t x19 = Op Syntax.SubWithGetBorrow 19 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7ffe7;;
+static void freeze(uint32_t out[9], const uint32_t in1[9]) {
+ { const uint32_t x15 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x7ffe7);
+ { uint32_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x19, Return x4, 0x3ffff);
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x22, Return x6, 0x3ffff);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x8, 0x7ffff);
+ { uint32_t x30, uint8_t x31 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x28, Return x10, 0x3ffff);
+ { uint32_t x33, uint8_t x34 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x31, Return x12, 0x3ffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x34, Return x14, 0x7ffff);
+ { uint32_t x39, uint8_t x40 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x16, 0x3ffff);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x40, Return x15, 0x3ffff);
+ { uint32_t x44 = (uint32_t)cmovznz(x43, 0x0, 0xffffffff);
+ { uint32_t x45 = (x44 & 0x7ffe7);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x18, Return x45);
+ { uint32_t x49 = (x44 & 0x3ffff);
+ { uint32_t x51, uint8_t x52 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x48, Return x21, Return x49);
+ { uint32_t x53 = (x44 & 0x3ffff);
+ { uint32_t x55, uint8_t x56 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x52, Return x24, Return x53);
+ { uint32_t x57 = (x44 & 0x7ffff);
+ { uint32_t x59, uint8_t x60 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x56, Return x27, Return x57);
+ { uint32_t x61 = (x44 & 0x3ffff);
+ { uint32_t x63, uint8_t x64 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x30, Return x61);
+ { uint32_t x65 = (x44 & 0x3ffff);
+ { uint32_t x67, uint8_t x68 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x64, Return x33, Return x65);
+ { uint32_t x69 = (x44 & 0x7ffff);
+ { uint32_t x71, uint8_t x72 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x68, Return x36, Return x69);
+ { uint32_t x73 = (x44 & 0x3ffff);
+ { uint32_t x75, uint8_t x76 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x39, Return x73);
+ { uint32_t x77 = (x44 & 0x3ffff);
+ { uint32_t x79, uint8_t _ = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x76, Return x42, Return x77);
+ out[0] = x47;
+ out[1] = x51;
+ out[2] = x55;
+ out[3] = x59;
+ out[4] = x63;
+ out[5] = x67;
+ out[6] = x71;
+ out[7] = x75;
+ out[8] = x79;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e166m5/femul.c b/src/Specific/solinas32_2e166m5/femul.c
index 6efdff2fa..e3fcf8b14 100644
--- a/src/Specific/solinas32_2e166m5/femul.c
+++ b/src/Specific/solinas32_2e166m5/femul.c
@@ -1,81 +1,85 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x42, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25)
-{ uint64_t x44 = (((uint64_t)x5 * x42) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + ((0x2 * ((uint64_t)x11 * x39)) + ((0x2 * ((uint64_t)x13 * x37)) + ((0x2 * ((uint64_t)x15 * x35)) + ((0x2 * ((uint64_t)x17 * x33)) + ((0x2 * ((uint64_t)x19 * x31)) + ((0x2 * ((uint64_t)x21 * x29)) + ((0x2 * ((uint64_t)x23 * x27)) + ((uint64_t)x22 * x25)))))))))));
-{ uint64_t x45 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((0x2 * ((uint64_t)x13 * x35)) + ((0x2 * ((uint64_t)x15 * x33)) + ((0x2 * ((uint64_t)x17 * x31)) + ((0x2 * ((uint64_t)x19 * x29)) + ((0x2 * ((uint64_t)x21 * x27)) + ((uint64_t)x23 * x25)))))))))) + (0x5 * ((uint64_t)x22 * x42)));
-{ uint64_t x46 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((0x2 * ((uint64_t)x11 * x35)) + ((0x2 * ((uint64_t)x13 * x33)) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((0x2 * ((uint64_t)x19 * x27)) + ((uint64_t)x21 * x25))))))))) + (0x5 * (((uint64_t)x23 * x42) + ((uint64_t)x22 * x43))));
-{ uint64_t x47 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + ((0x2 * ((uint64_t)x11 * x33)) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((0x2 * ((uint64_t)x17 * x27)) + ((uint64_t)x19 * x25)))))))) + (0x5 * (((uint64_t)x21 * x42) + (((uint64_t)x23 * x43) + ((uint64_t)x22 * x41)))));
-{ uint64_t x48 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + ((0x2 * ((uint64_t)x9 * x33)) + ((0x2 * ((uint64_t)x11 * x31)) + ((0x2 * ((uint64_t)x13 * x29)) + ((0x2 * ((uint64_t)x15 * x27)) + ((uint64_t)x17 * x25))))))) + (0x5 * (((uint64_t)x19 * x42) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + ((uint64_t)x22 * x39))))));
-{ uint64_t x49 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((0x2 * ((uint64_t)x13 * x27)) + ((uint64_t)x15 * x25)))))) + (0x5 * (((uint64_t)x17 * x42) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + (((uint64_t)x23 * x39) + ((uint64_t)x22 * x37)))))));
-{ uint64_t x50 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((0x2 * ((uint64_t)x11 * x27)) + ((uint64_t)x13 * x25))))) + (0x5 * (((uint64_t)x15 * x42) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + ((uint64_t)x22 * x35))))))));
-{ uint64_t x51 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((0x2 * ((uint64_t)x9 * x27)) + ((uint64_t)x11 * x25)))) + (0x5 * (((uint64_t)x13 * x42) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + ((uint64_t)x22 * x33)))))))));
-{ uint64_t x52 = ((((uint64_t)x5 * x29) + ((0x2 * ((uint64_t)x7 * x27)) + ((uint64_t)x9 * x25))) + (0x5 * (((uint64_t)x11 * x42) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + (((uint64_t)x23 * x33) + ((uint64_t)x22 * x31))))))))));
-{ uint64_t x53 = ((((uint64_t)x5 * x27) + ((uint64_t)x7 * x25)) + (0x5 * (((uint64_t)x9 * x42) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + (((uint64_t)x23 * x31) + ((uint64_t)x22 * x29)))))))))));
-{ uint64_t x54 = (((uint64_t)x5 * x25) + (0x5 * ((0x2 * ((uint64_t)x7 * x42)) + ((0x2 * ((uint64_t)x9 * x43)) + ((0x2 * ((uint64_t)x11 * x41)) + ((0x2 * ((uint64_t)x13 * x39)) + ((0x2 * ((uint64_t)x15 * x37)) + ((0x2 * ((uint64_t)x17 * x35)) + ((0x2 * ((uint64_t)x19 * x33)) + ((0x2 * ((uint64_t)x21 * x31)) + ((0x2 * ((uint64_t)x23 * x29)) + (0x2 * ((uint64_t)x22 * x27)))))))))))));
-{ uint32_t x55 = (uint32_t) (x54 >> 0x10);
-{ uint32_t x56 = ((uint32_t)x54 & 0xffff);
-{ uint64_t x57 = (x55 + x53);
-{ uint32_t x58 = (uint32_t) (x57 >> 0xf);
-{ uint32_t x59 = ((uint32_t)x57 & 0x7fff);
-{ uint64_t x60 = (x58 + x52);
-{ uint32_t x61 = (uint32_t) (x60 >> 0xf);
-{ uint32_t x62 = ((uint32_t)x60 & 0x7fff);
-{ uint64_t x63 = (x61 + x51);
-{ uint32_t x64 = (uint32_t) (x63 >> 0xf);
-{ uint32_t x65 = ((uint32_t)x63 & 0x7fff);
-{ uint64_t x66 = (x64 + x50);
-{ uint32_t x67 = (uint32_t) (x66 >> 0xf);
-{ uint32_t x68 = ((uint32_t)x66 & 0x7fff);
-{ uint64_t x69 = (x67 + x49);
-{ uint32_t x70 = (uint32_t) (x69 >> 0xf);
-{ uint32_t x71 = ((uint32_t)x69 & 0x7fff);
-{ uint64_t x72 = (x70 + x48);
-{ uint32_t x73 = (uint32_t) (x72 >> 0xf);
-{ uint32_t x74 = ((uint32_t)x72 & 0x7fff);
-{ uint64_t x75 = (x73 + x47);
-{ uint32_t x76 = (uint32_t) (x75 >> 0xf);
-{ uint32_t x77 = ((uint32_t)x75 & 0x7fff);
-{ uint64_t x78 = (x76 + x46);
-{ uint32_t x79 = (uint32_t) (x78 >> 0xf);
-{ uint32_t x80 = ((uint32_t)x78 & 0x7fff);
-{ uint64_t x81 = (x79 + x45);
-{ uint32_t x82 = (uint32_t) (x81 >> 0xf);
-{ uint32_t x83 = ((uint32_t)x81 & 0x7fff);
-{ uint64_t x84 = (x82 + x44);
-{ uint32_t x85 = (uint32_t) (x84 >> 0xf);
-{ uint32_t x86 = ((uint32_t)x84 & 0x7fff);
-{ uint32_t x87 = (x56 + (0x5 * x85));
-{ uint32_t x88 = (x87 >> 0x10);
-{ uint32_t x89 = (x87 & 0xffff);
-{ uint32_t x90 = (x88 + x59);
-{ uint32_t x91 = (x90 >> 0xf);
-{ uint32_t x92 = (x90 & 0x7fff);
-out[0] = x86;
-out[1] = x83;
-out[2] = x80;
-out[3] = x77;
-out[4] = x74;
-out[5] = x71;
-out[6] = x68;
-out[7] = x65;
-out[8] = x91 + x62;
-out[9] = x92;
-out[10] = x89;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[11];
+static void femul(uint32_t out[11], const uint32_t in1[11], const uint32_t in2[11]) {
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x42 = in2[10];
+ { const uint32_t x43 = in2[9];
+ { const uint32_t x41 = in2[8];
+ { const uint32_t x39 = in2[7];
+ { const uint32_t x37 = in2[6];
+ { const uint32_t x35 = in2[5];
+ { const uint32_t x33 = in2[4];
+ { const uint32_t x31 = in2[3];
+ { const uint32_t x29 = in2[2];
+ { const uint32_t x27 = in2[1];
+ { const uint32_t x25 = in2[0];
+ { uint64_t x44 = (((uint64_t)x5 * x42) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + ((0x2 * ((uint64_t)x11 * x39)) + ((0x2 * ((uint64_t)x13 * x37)) + ((0x2 * ((uint64_t)x15 * x35)) + ((0x2 * ((uint64_t)x17 * x33)) + ((0x2 * ((uint64_t)x19 * x31)) + ((0x2 * ((uint64_t)x21 * x29)) + ((0x2 * ((uint64_t)x23 * x27)) + ((uint64_t)x22 * x25)))))))))));
+ { uint64_t x45 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((0x2 * ((uint64_t)x13 * x35)) + ((0x2 * ((uint64_t)x15 * x33)) + ((0x2 * ((uint64_t)x17 * x31)) + ((0x2 * ((uint64_t)x19 * x29)) + ((0x2 * ((uint64_t)x21 * x27)) + ((uint64_t)x23 * x25)))))))))) + (0x5 * ((uint64_t)x22 * x42)));
+ { uint64_t x46 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((0x2 * ((uint64_t)x11 * x35)) + ((0x2 * ((uint64_t)x13 * x33)) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((0x2 * ((uint64_t)x19 * x27)) + ((uint64_t)x21 * x25))))))))) + (0x5 * (((uint64_t)x23 * x42) + ((uint64_t)x22 * x43))));
+ { uint64_t x47 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + ((0x2 * ((uint64_t)x11 * x33)) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((0x2 * ((uint64_t)x17 * x27)) + ((uint64_t)x19 * x25)))))))) + (0x5 * (((uint64_t)x21 * x42) + (((uint64_t)x23 * x43) + ((uint64_t)x22 * x41)))));
+ { uint64_t x48 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + ((0x2 * ((uint64_t)x9 * x33)) + ((0x2 * ((uint64_t)x11 * x31)) + ((0x2 * ((uint64_t)x13 * x29)) + ((0x2 * ((uint64_t)x15 * x27)) + ((uint64_t)x17 * x25))))))) + (0x5 * (((uint64_t)x19 * x42) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + ((uint64_t)x22 * x39))))));
+ { uint64_t x49 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((0x2 * ((uint64_t)x13 * x27)) + ((uint64_t)x15 * x25)))))) + (0x5 * (((uint64_t)x17 * x42) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + (((uint64_t)x23 * x39) + ((uint64_t)x22 * x37)))))));
+ { uint64_t x50 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((0x2 * ((uint64_t)x11 * x27)) + ((uint64_t)x13 * x25))))) + (0x5 * (((uint64_t)x15 * x42) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + ((uint64_t)x22 * x35))))))));
+ { uint64_t x51 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((0x2 * ((uint64_t)x9 * x27)) + ((uint64_t)x11 * x25)))) + (0x5 * (((uint64_t)x13 * x42) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + ((uint64_t)x22 * x33)))))))));
+ { uint64_t x52 = ((((uint64_t)x5 * x29) + ((0x2 * ((uint64_t)x7 * x27)) + ((uint64_t)x9 * x25))) + (0x5 * (((uint64_t)x11 * x42) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + (((uint64_t)x23 * x33) + ((uint64_t)x22 * x31))))))))));
+ { uint64_t x53 = ((((uint64_t)x5 * x27) + ((uint64_t)x7 * x25)) + (0x5 * (((uint64_t)x9 * x42) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + (((uint64_t)x23 * x31) + ((uint64_t)x22 * x29)))))))))));
+ { uint64_t x54 = (((uint64_t)x5 * x25) + (0x5 * ((0x2 * ((uint64_t)x7 * x42)) + ((0x2 * ((uint64_t)x9 * x43)) + ((0x2 * ((uint64_t)x11 * x41)) + ((0x2 * ((uint64_t)x13 * x39)) + ((0x2 * ((uint64_t)x15 * x37)) + ((0x2 * ((uint64_t)x17 * x35)) + ((0x2 * ((uint64_t)x19 * x33)) + ((0x2 * ((uint64_t)x21 * x31)) + ((0x2 * ((uint64_t)x23 * x29)) + (0x2 * ((uint64_t)x22 * x27)))))))))))));
+ { uint32_t x55 = (uint32_t) (x54 >> 0x10);
+ { uint32_t x56 = ((uint32_t)x54 & 0xffff);
+ { uint64_t x57 = (x55 + x53);
+ { uint32_t x58 = (uint32_t) (x57 >> 0xf);
+ { uint32_t x59 = ((uint32_t)x57 & 0x7fff);
+ { uint64_t x60 = (x58 + x52);
+ { uint32_t x61 = (uint32_t) (x60 >> 0xf);
+ { uint32_t x62 = ((uint32_t)x60 & 0x7fff);
+ { uint64_t x63 = (x61 + x51);
+ { uint32_t x64 = (uint32_t) (x63 >> 0xf);
+ { uint32_t x65 = ((uint32_t)x63 & 0x7fff);
+ { uint64_t x66 = (x64 + x50);
+ { uint32_t x67 = (uint32_t) (x66 >> 0xf);
+ { uint32_t x68 = ((uint32_t)x66 & 0x7fff);
+ { uint64_t x69 = (x67 + x49);
+ { uint32_t x70 = (uint32_t) (x69 >> 0xf);
+ { uint32_t x71 = ((uint32_t)x69 & 0x7fff);
+ { uint64_t x72 = (x70 + x48);
+ { uint32_t x73 = (uint32_t) (x72 >> 0xf);
+ { uint32_t x74 = ((uint32_t)x72 & 0x7fff);
+ { uint64_t x75 = (x73 + x47);
+ { uint32_t x76 = (uint32_t) (x75 >> 0xf);
+ { uint32_t x77 = ((uint32_t)x75 & 0x7fff);
+ { uint64_t x78 = (x76 + x46);
+ { uint32_t x79 = (uint32_t) (x78 >> 0xf);
+ { uint32_t x80 = ((uint32_t)x78 & 0x7fff);
+ { uint64_t x81 = (x79 + x45);
+ { uint32_t x82 = (uint32_t) (x81 >> 0xf);
+ { uint32_t x83 = ((uint32_t)x81 & 0x7fff);
+ { uint64_t x84 = (x82 + x44);
+ { uint32_t x85 = (uint32_t) (x84 >> 0xf);
+ { uint32_t x86 = ((uint32_t)x84 & 0x7fff);
+ { uint32_t x87 = (x56 + (0x5 * x85));
+ { uint32_t x88 = (x87 >> 0x10);
+ { uint32_t x89 = (x87 & 0xffff);
+ { uint32_t x90 = (x88 + x59);
+ { uint32_t x91 = (x90 >> 0xf);
+ { uint32_t x92 = (x90 & 0x7fff);
+ out[0] = x89;
+ out[1] = x92;
+ out[2] = (x91 + x62);
+ out[3] = x65;
+ out[4] = x68;
+ out[5] = x71;
+ out[6] = x74;
+ out[7] = x77;
+ out[8] = x80;
+ out[9] = x83;
+ out[10] = x86;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e166m5/fesquare.c b/src/Specific/solinas32_2e166m5/fesquare.c
index b9645bca1..f26c76081 100644
--- a/src/Specific/solinas32_2e166m5/fesquare.c
+++ b/src/Specific/solinas32_2e166m5/fesquare.c
@@ -1,81 +1,74 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x21 = (((uint64_t)x2 * x19) + ((0x2 * ((uint64_t)x4 * x20)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x19 * x2)))))))))));
-{ uint64_t x22 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x5 * ((uint64_t)x19 * x19)));
-{ uint64_t x23 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x5 * (((uint64_t)x20 * x19) + ((uint64_t)x19 * x20))));
-{ uint64_t x24 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x5 * (((uint64_t)x18 * x19) + (((uint64_t)x20 * x20) + ((uint64_t)x19 * x18)))));
-{ uint64_t x25 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x5 * (((uint64_t)x16 * x19) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + ((uint64_t)x19 * x16))))));
-{ uint64_t x26 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x5 * (((uint64_t)x14 * x19) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + ((uint64_t)x19 * x14)))))));
-{ uint64_t x27 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x5 * (((uint64_t)x12 * x19) + (((uint64_t)x14 * x20) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + (((uint64_t)x20 * x14) + ((uint64_t)x19 * x12))))))));
-{ uint64_t x28 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x5 * (((uint64_t)x10 * x19) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + ((uint64_t)x19 * x10)))))))));
-{ uint64_t x29 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x5 * (((uint64_t)x8 * x19) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + ((uint64_t)x19 * x8))))))))));
-{ uint64_t x30 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x5 * (((uint64_t)x6 * x19) + (((uint64_t)x8 * x20) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + (((uint64_t)x20 * x8) + ((uint64_t)x19 * x6)))))))))));
-{ uint64_t x31 = (((uint64_t)x2 * x2) + (0x5 * ((0x2 * ((uint64_t)x4 * x19)) + ((0x2 * ((uint64_t)x6 * x20)) + ((0x2 * ((uint64_t)x8 * x18)) + ((0x2 * ((uint64_t)x10 * x16)) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + ((0x2 * ((uint64_t)x16 * x10)) + ((0x2 * ((uint64_t)x18 * x8)) + ((0x2 * ((uint64_t)x20 * x6)) + (0x2 * ((uint64_t)x19 * x4)))))))))))));
-{ uint32_t x32 = (uint32_t) (x31 >> 0x10);
-{ uint32_t x33 = ((uint32_t)x31 & 0xffff);
-{ uint64_t x34 = (x32 + x30);
-{ uint32_t x35 = (uint32_t) (x34 >> 0xf);
-{ uint32_t x36 = ((uint32_t)x34 & 0x7fff);
-{ uint64_t x37 = (x35 + x29);
-{ uint32_t x38 = (uint32_t) (x37 >> 0xf);
-{ uint32_t x39 = ((uint32_t)x37 & 0x7fff);
-{ uint64_t x40 = (x38 + x28);
-{ uint32_t x41 = (uint32_t) (x40 >> 0xf);
-{ uint32_t x42 = ((uint32_t)x40 & 0x7fff);
-{ uint64_t x43 = (x41 + x27);
-{ uint32_t x44 = (uint32_t) (x43 >> 0xf);
-{ uint32_t x45 = ((uint32_t)x43 & 0x7fff);
-{ uint64_t x46 = (x44 + x26);
-{ uint32_t x47 = (uint32_t) (x46 >> 0xf);
-{ uint32_t x48 = ((uint32_t)x46 & 0x7fff);
-{ uint64_t x49 = (x47 + x25);
-{ uint32_t x50 = (uint32_t) (x49 >> 0xf);
-{ uint32_t x51 = ((uint32_t)x49 & 0x7fff);
-{ uint64_t x52 = (x50 + x24);
-{ uint32_t x53 = (uint32_t) (x52 >> 0xf);
-{ uint32_t x54 = ((uint32_t)x52 & 0x7fff);
-{ uint64_t x55 = (x53 + x23);
-{ uint32_t x56 = (uint32_t) (x55 >> 0xf);
-{ uint32_t x57 = ((uint32_t)x55 & 0x7fff);
-{ uint64_t x58 = (x56 + x22);
-{ uint32_t x59 = (uint32_t) (x58 >> 0xf);
-{ uint32_t x60 = ((uint32_t)x58 & 0x7fff);
-{ uint64_t x61 = (x59 + x21);
-{ uint32_t x62 = (uint32_t) (x61 >> 0xf);
-{ uint32_t x63 = ((uint32_t)x61 & 0x7fff);
-{ uint32_t x64 = (x33 + (0x5 * x62));
-{ uint32_t x65 = (x64 >> 0x10);
-{ uint32_t x66 = (x64 & 0xffff);
-{ uint32_t x67 = (x65 + x36);
-{ uint32_t x68 = (x67 >> 0xf);
-{ uint32_t x69 = (x67 & 0x7fff);
-out[0] = x63;
-out[1] = x60;
-out[2] = x57;
-out[3] = x54;
-out[4] = x51;
-out[5] = x48;
-out[6] = x45;
-out[7] = x42;
-out[8] = x68 + x39;
-out[9] = x69;
-out[10] = x66;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[11];
+static void fesquare(uint32_t out[11], const uint32_t in1[11]) {
+ { const uint32_t x19 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x21 = (((uint64_t)x2 * x19) + ((0x2 * ((uint64_t)x4 * x20)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x19 * x2)))))))))));
+ { uint64_t x22 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x5 * ((uint64_t)x19 * x19)));
+ { uint64_t x23 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x5 * (((uint64_t)x20 * x19) + ((uint64_t)x19 * x20))));
+ { uint64_t x24 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x5 * (((uint64_t)x18 * x19) + (((uint64_t)x20 * x20) + ((uint64_t)x19 * x18)))));
+ { uint64_t x25 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x5 * (((uint64_t)x16 * x19) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + ((uint64_t)x19 * x16))))));
+ { uint64_t x26 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x5 * (((uint64_t)x14 * x19) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + ((uint64_t)x19 * x14)))))));
+ { uint64_t x27 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x5 * (((uint64_t)x12 * x19) + (((uint64_t)x14 * x20) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + (((uint64_t)x20 * x14) + ((uint64_t)x19 * x12))))))));
+ { uint64_t x28 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x5 * (((uint64_t)x10 * x19) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + ((uint64_t)x19 * x10)))))))));
+ { uint64_t x29 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x5 * (((uint64_t)x8 * x19) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + ((uint64_t)x19 * x8))))))))));
+ { uint64_t x30 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x5 * (((uint64_t)x6 * x19) + (((uint64_t)x8 * x20) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + (((uint64_t)x20 * x8) + ((uint64_t)x19 * x6)))))))))));
+ { uint64_t x31 = (((uint64_t)x2 * x2) + (0x5 * ((0x2 * ((uint64_t)x4 * x19)) + ((0x2 * ((uint64_t)x6 * x20)) + ((0x2 * ((uint64_t)x8 * x18)) + ((0x2 * ((uint64_t)x10 * x16)) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + ((0x2 * ((uint64_t)x16 * x10)) + ((0x2 * ((uint64_t)x18 * x8)) + ((0x2 * ((uint64_t)x20 * x6)) + (0x2 * ((uint64_t)x19 * x4)))))))))))));
+ { uint32_t x32 = (uint32_t) (x31 >> 0x10);
+ { uint32_t x33 = ((uint32_t)x31 & 0xffff);
+ { uint64_t x34 = (x32 + x30);
+ { uint32_t x35 = (uint32_t) (x34 >> 0xf);
+ { uint32_t x36 = ((uint32_t)x34 & 0x7fff);
+ { uint64_t x37 = (x35 + x29);
+ { uint32_t x38 = (uint32_t) (x37 >> 0xf);
+ { uint32_t x39 = ((uint32_t)x37 & 0x7fff);
+ { uint64_t x40 = (x38 + x28);
+ { uint32_t x41 = (uint32_t) (x40 >> 0xf);
+ { uint32_t x42 = ((uint32_t)x40 & 0x7fff);
+ { uint64_t x43 = (x41 + x27);
+ { uint32_t x44 = (uint32_t) (x43 >> 0xf);
+ { uint32_t x45 = ((uint32_t)x43 & 0x7fff);
+ { uint64_t x46 = (x44 + x26);
+ { uint32_t x47 = (uint32_t) (x46 >> 0xf);
+ { uint32_t x48 = ((uint32_t)x46 & 0x7fff);
+ { uint64_t x49 = (x47 + x25);
+ { uint32_t x50 = (uint32_t) (x49 >> 0xf);
+ { uint32_t x51 = ((uint32_t)x49 & 0x7fff);
+ { uint64_t x52 = (x50 + x24);
+ { uint32_t x53 = (uint32_t) (x52 >> 0xf);
+ { uint32_t x54 = ((uint32_t)x52 & 0x7fff);
+ { uint64_t x55 = (x53 + x23);
+ { uint32_t x56 = (uint32_t) (x55 >> 0xf);
+ { uint32_t x57 = ((uint32_t)x55 & 0x7fff);
+ { uint64_t x58 = (x56 + x22);
+ { uint32_t x59 = (uint32_t) (x58 >> 0xf);
+ { uint32_t x60 = ((uint32_t)x58 & 0x7fff);
+ { uint64_t x61 = (x59 + x21);
+ { uint32_t x62 = (uint32_t) (x61 >> 0xf);
+ { uint32_t x63 = ((uint32_t)x61 & 0x7fff);
+ { uint32_t x64 = (x33 + (0x5 * x62));
+ { uint32_t x65 = (x64 >> 0x10);
+ { uint32_t x66 = (x64 & 0xffff);
+ { uint32_t x67 = (x65 + x36);
+ { uint32_t x68 = (x67 >> 0xf);
+ { uint32_t x69 = (x67 & 0x7fff);
+ out[0] = x66;
+ out[1] = x69;
+ out[2] = (x68 + x39);
+ out[3] = x42;
+ out[4] = x45;
+ out[5] = x48;
+ out[6] = x51;
+ out[7] = x54;
+ out[8] = x57;
+ out[9] = x60;
+ out[10] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e166m5/freeze.c b/src/Specific/solinas32_2e166m5/freeze.c
index e2c8a95fd..2d342491a 100644
--- a/src/Specific/solinas32_2e166m5/freeze.c
+++ b/src/Specific/solinas32_2e166m5/freeze.c
@@ -1,25 +1,59 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x22;
-out[1] = uint8_t x23 = Op Syntax.SubWithGetBorrow 16 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xfffb;;
+static void freeze(uint32_t out[11], const uint32_t in1[11]) {
+ { const uint32_t x19 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 16 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0xfffb);
+ { uint32_t x25, uint8_t x26 = Op (Syntax.SubWithGetBorrow 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x23, Return x4, 0x7fff);
+ { uint32_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x26, Return x6, 0x7fff);
+ { uint32_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x29, Return x8, 0x7fff);
+ { uint32_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x32, Return x10, 0x7fff);
+ { uint32_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x35, Return x12, 0x7fff);
+ { uint32_t x40, uint8_t x41 = Op (Syntax.SubWithGetBorrow 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x38, Return x14, 0x7fff);
+ { uint32_t x43, uint8_t x44 = Op (Syntax.SubWithGetBorrow 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x41, Return x16, 0x7fff);
+ { uint32_t x46, uint8_t x47 = Op (Syntax.SubWithGetBorrow 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x44, Return x18, 0x7fff);
+ { uint32_t x49, uint8_t x50 = Op (Syntax.SubWithGetBorrow 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x47, Return x20, 0x7fff);
+ { uint32_t x52, uint8_t x53 = Op (Syntax.SubWithGetBorrow 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x50, Return x19, 0x7fff);
+ { uint32_t x54 = (uint32_t)cmovznz(x53, 0x0, 0xffffffff);
+ { uint32_t x55 = (x54 & 0xfffb);
+ { uint32_t x57, uint8_t x58 = Op (Syntax.AddWithGetCarry 16 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x22, Return x55);
+ { uint32_t x59 = (x54 & 0x7fff);
+ { uint32_t x61, uint8_t x62 = Op (Syntax.AddWithGetCarry 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x58, Return x25, Return x59);
+ { uint32_t x63 = (x54 & 0x7fff);
+ { uint32_t x65, uint8_t x66 = Op (Syntax.AddWithGetCarry 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x62, Return x28, Return x63);
+ { uint32_t x67 = (x54 & 0x7fff);
+ { uint32_t x69, uint8_t x70 = Op (Syntax.AddWithGetCarry 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x66, Return x31, Return x67);
+ { uint32_t x71 = (x54 & 0x7fff);
+ { uint32_t x73, uint8_t x74 = Op (Syntax.AddWithGetCarry 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x70, Return x34, Return x71);
+ { uint32_t x75 = (x54 & 0x7fff);
+ { uint32_t x77, uint8_t x78 = Op (Syntax.AddWithGetCarry 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x74, Return x37, Return x75);
+ { uint32_t x79 = (x54 & 0x7fff);
+ { uint32_t x81, uint8_t x82 = Op (Syntax.AddWithGetCarry 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x78, Return x40, Return x79);
+ { uint32_t x83 = (x54 & 0x7fff);
+ { uint32_t x85, uint8_t x86 = Op (Syntax.AddWithGetCarry 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x82, Return x43, Return x83);
+ { uint32_t x87 = (x54 & 0x7fff);
+ { uint32_t x89, uint8_t x90 = Op (Syntax.AddWithGetCarry 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x86, Return x46, Return x87);
+ { uint32_t x91 = (x54 & 0x7fff);
+ { uint32_t x93, uint8_t x94 = Op (Syntax.AddWithGetCarry 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x90, Return x49, Return x91);
+ { uint32_t x95 = (x54 & 0x7fff);
+ { uint32_t x97, uint8_t _ = Op (Syntax.AddWithGetCarry 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x94, Return x52, Return x95);
+ out[0] = x57;
+ out[1] = x61;
+ out[2] = x65;
+ out[3] = x69;
+ out[4] = x73;
+ out[5] = x77;
+ out[6] = x81;
+ out[7] = x85;
+ out[8] = x89;
+ out[9] = x93;
+ out[10] = x97;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e171m19/femul.c b/src/Specific/solinas32_2e171m19/femul.c
index 2fcdf2aad..21a912978 100644
--- a/src/Specific/solinas32_2e171m19/femul.c
+++ b/src/Specific/solinas32_2e171m19/femul.c
@@ -1,71 +1,71 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
-{ uint64_t x36 = (((uint64_t)x5 * x34) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + (((uint64_t)x19 * x23) + ((uint64_t)x18 * x21)))))))));
-{ uint64_t x37 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x19 * x21)))))))) + (0x13 * ((uint64_t)x18 * x34)));
-{ uint64_t x38 = ((((uint64_t)x5 * x33) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + ((uint64_t)x17 * x21))))))) + (0x13 * (((uint64_t)x19 * x34) + ((uint64_t)x18 * x35))));
-{ uint64_t x39 = ((((uint64_t)x5 * x31) + (((uint64_t)x7 * x29) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + ((uint64_t)x15 * x21)))))) + (0x13 * (((uint64_t)x17 * x34) + (((uint64_t)x19 * x35) + ((uint64_t)x18 * x33)))));
-{ uint64_t x40 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + ((uint64_t)x13 * x21))))) + (0x13 * (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))));
-{ uint64_t x41 = ((((uint64_t)x5 * x27) + (((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + ((uint64_t)x11 * x21)))) + (0x13 * (((uint64_t)x13 * x34) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + ((uint64_t)x18 * x29)))))));
-{ uint64_t x42 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + ((uint64_t)x9 * x21))) + (0x13 * (((uint64_t)x11 * x34) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + (((uint64_t)x19 * x29) + ((uint64_t)x18 * x27))))))));
-{ uint64_t x43 = ((((uint64_t)x5 * x23) + ((uint64_t)x7 * x21)) + (0x13 * (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))));
-{ uint64_t x44 = (((uint64_t)x5 * x21) + (0x13 * (((uint64_t)x7 * x34) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + (((uint64_t)x17 * x27) + (((uint64_t)x19 * x25) + ((uint64_t)x18 * x23))))))))));
-{ uint32_t x45 = (uint32_t) (x44 >> 0x13);
-{ uint32_t x46 = ((uint32_t)x44 & 0x7ffff);
-{ uint64_t x47 = (x45 + x43);
-{ uint32_t x48 = (uint32_t) (x47 >> 0x13);
-{ uint32_t x49 = ((uint32_t)x47 & 0x7ffff);
-{ uint64_t x50 = (x48 + x42);
-{ uint32_t x51 = (uint32_t) (x50 >> 0x13);
-{ uint32_t x52 = ((uint32_t)x50 & 0x7ffff);
-{ uint64_t x53 = (x51 + x41);
-{ uint32_t x54 = (uint32_t) (x53 >> 0x13);
-{ uint32_t x55 = ((uint32_t)x53 & 0x7ffff);
-{ uint64_t x56 = (x54 + x40);
-{ uint32_t x57 = (uint32_t) (x56 >> 0x13);
-{ uint32_t x58 = ((uint32_t)x56 & 0x7ffff);
-{ uint64_t x59 = (x57 + x39);
-{ uint32_t x60 = (uint32_t) (x59 >> 0x13);
-{ uint32_t x61 = ((uint32_t)x59 & 0x7ffff);
-{ uint64_t x62 = (x60 + x38);
-{ uint32_t x63 = (uint32_t) (x62 >> 0x13);
-{ uint32_t x64 = ((uint32_t)x62 & 0x7ffff);
-{ uint64_t x65 = (x63 + x37);
-{ uint32_t x66 = (uint32_t) (x65 >> 0x13);
-{ uint32_t x67 = ((uint32_t)x65 & 0x7ffff);
-{ uint64_t x68 = (x66 + x36);
-{ uint32_t x69 = (uint32_t) (x68 >> 0x13);
-{ uint32_t x70 = ((uint32_t)x68 & 0x7ffff);
-{ uint32_t x71 = (x46 + (0x13 * x69));
-{ uint32_t x72 = (x71 >> 0x13);
-{ uint32_t x73 = (x71 & 0x7ffff);
-{ uint32_t x74 = (x72 + x49);
-{ uint32_t x75 = (x74 >> 0x13);
-{ uint32_t x76 = (x74 & 0x7ffff);
-out[0] = x70;
-out[1] = x67;
-out[2] = x64;
-out[3] = x61;
-out[4] = x58;
-out[5] = x55;
-out[6] = x75 + x52;
-out[7] = x76;
-out[8] = x73;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void femul(uint32_t out[9], const uint32_t in1[9], const uint32_t in2[9]) {
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x34 = in2[8];
+ { const uint32_t x35 = in2[7];
+ { const uint32_t x33 = in2[6];
+ { const uint32_t x31 = in2[5];
+ { const uint32_t x29 = in2[4];
+ { const uint32_t x27 = in2[3];
+ { const uint32_t x25 = in2[2];
+ { const uint32_t x23 = in2[1];
+ { const uint32_t x21 = in2[0];
+ { uint64_t x36 = (((uint64_t)x5 * x34) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + (((uint64_t)x19 * x23) + ((uint64_t)x18 * x21)))))))));
+ { uint64_t x37 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x19 * x21)))))))) + (0x13 * ((uint64_t)x18 * x34)));
+ { uint64_t x38 = ((((uint64_t)x5 * x33) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + ((uint64_t)x17 * x21))))))) + (0x13 * (((uint64_t)x19 * x34) + ((uint64_t)x18 * x35))));
+ { uint64_t x39 = ((((uint64_t)x5 * x31) + (((uint64_t)x7 * x29) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + ((uint64_t)x15 * x21)))))) + (0x13 * (((uint64_t)x17 * x34) + (((uint64_t)x19 * x35) + ((uint64_t)x18 * x33)))));
+ { uint64_t x40 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + ((uint64_t)x13 * x21))))) + (0x13 * (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))));
+ { uint64_t x41 = ((((uint64_t)x5 * x27) + (((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + ((uint64_t)x11 * x21)))) + (0x13 * (((uint64_t)x13 * x34) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + ((uint64_t)x18 * x29)))))));
+ { uint64_t x42 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + ((uint64_t)x9 * x21))) + (0x13 * (((uint64_t)x11 * x34) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + (((uint64_t)x19 * x29) + ((uint64_t)x18 * x27))))))));
+ { uint64_t x43 = ((((uint64_t)x5 * x23) + ((uint64_t)x7 * x21)) + (0x13 * (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))));
+ { uint64_t x44 = (((uint64_t)x5 * x21) + (0x13 * (((uint64_t)x7 * x34) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + (((uint64_t)x17 * x27) + (((uint64_t)x19 * x25) + ((uint64_t)x18 * x23))))))))));
+ { uint32_t x45 = (uint32_t) (x44 >> 0x13);
+ { uint32_t x46 = ((uint32_t)x44 & 0x7ffff);
+ { uint64_t x47 = (x45 + x43);
+ { uint32_t x48 = (uint32_t) (x47 >> 0x13);
+ { uint32_t x49 = ((uint32_t)x47 & 0x7ffff);
+ { uint64_t x50 = (x48 + x42);
+ { uint32_t x51 = (uint32_t) (x50 >> 0x13);
+ { uint32_t x52 = ((uint32_t)x50 & 0x7ffff);
+ { uint64_t x53 = (x51 + x41);
+ { uint32_t x54 = (uint32_t) (x53 >> 0x13);
+ { uint32_t x55 = ((uint32_t)x53 & 0x7ffff);
+ { uint64_t x56 = (x54 + x40);
+ { uint32_t x57 = (uint32_t) (x56 >> 0x13);
+ { uint32_t x58 = ((uint32_t)x56 & 0x7ffff);
+ { uint64_t x59 = (x57 + x39);
+ { uint32_t x60 = (uint32_t) (x59 >> 0x13);
+ { uint32_t x61 = ((uint32_t)x59 & 0x7ffff);
+ { uint64_t x62 = (x60 + x38);
+ { uint32_t x63 = (uint32_t) (x62 >> 0x13);
+ { uint32_t x64 = ((uint32_t)x62 & 0x7ffff);
+ { uint64_t x65 = (x63 + x37);
+ { uint32_t x66 = (uint32_t) (x65 >> 0x13);
+ { uint32_t x67 = ((uint32_t)x65 & 0x7ffff);
+ { uint64_t x68 = (x66 + x36);
+ { uint32_t x69 = (uint32_t) (x68 >> 0x13);
+ { uint32_t x70 = ((uint32_t)x68 & 0x7ffff);
+ { uint32_t x71 = (x46 + (0x13 * x69));
+ { uint32_t x72 = (x71 >> 0x13);
+ { uint32_t x73 = (x71 & 0x7ffff);
+ { uint32_t x74 = (x72 + x49);
+ { uint32_t x75 = (x74 >> 0x13);
+ { uint32_t x76 = (x74 & 0x7ffff);
+ out[0] = x73;
+ out[1] = x76;
+ out[2] = (x75 + x52);
+ out[3] = x55;
+ out[4] = x58;
+ out[5] = x61;
+ out[6] = x64;
+ out[7] = x67;
+ out[8] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e171m19/fesquare.c b/src/Specific/solinas32_2e171m19/fesquare.c
index 02e4e804e..7c664a241 100644
--- a/src/Specific/solinas32_2e171m19/fesquare.c
+++ b/src/Specific/solinas32_2e171m19/fesquare.c
@@ -1,71 +1,62 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x17 = (((uint64_t)x2 * x15) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x15 * x2)))))))));
-{ uint64_t x18 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x13 * ((uint64_t)x15 * x15)));
-{ uint64_t x19 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x13 * (((uint64_t)x16 * x15) + ((uint64_t)x15 * x16))));
-{ uint64_t x20 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x13 * (((uint64_t)x14 * x15) + (((uint64_t)x16 * x16) + ((uint64_t)x15 * x14)))));
-{ uint64_t x21 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x13 * (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))));
-{ uint64_t x22 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x13 * (((uint64_t)x10 * x15) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + ((uint64_t)x15 * x10)))))));
-{ uint64_t x23 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x13 * (((uint64_t)x8 * x15) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + ((uint64_t)x15 * x8))))))));
-{ uint64_t x24 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x13 * (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))));
-{ uint64_t x25 = (((uint64_t)x2 * x2) + (0x13 * (((uint64_t)x4 * x15) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + ((uint64_t)x15 * x4))))))))));
-{ uint32_t x26 = (uint32_t) (x25 >> 0x13);
-{ uint32_t x27 = ((uint32_t)x25 & 0x7ffff);
-{ uint64_t x28 = (x26 + x24);
-{ uint32_t x29 = (uint32_t) (x28 >> 0x13);
-{ uint32_t x30 = ((uint32_t)x28 & 0x7ffff);
-{ uint64_t x31 = (x29 + x23);
-{ uint32_t x32 = (uint32_t) (x31 >> 0x13);
-{ uint32_t x33 = ((uint32_t)x31 & 0x7ffff);
-{ uint64_t x34 = (x32 + x22);
-{ uint32_t x35 = (uint32_t) (x34 >> 0x13);
-{ uint32_t x36 = ((uint32_t)x34 & 0x7ffff);
-{ uint64_t x37 = (x35 + x21);
-{ uint32_t x38 = (uint32_t) (x37 >> 0x13);
-{ uint32_t x39 = ((uint32_t)x37 & 0x7ffff);
-{ uint64_t x40 = (x38 + x20);
-{ uint32_t x41 = (uint32_t) (x40 >> 0x13);
-{ uint32_t x42 = ((uint32_t)x40 & 0x7ffff);
-{ uint64_t x43 = (x41 + x19);
-{ uint32_t x44 = (uint32_t) (x43 >> 0x13);
-{ uint32_t x45 = ((uint32_t)x43 & 0x7ffff);
-{ uint64_t x46 = (x44 + x18);
-{ uint32_t x47 = (uint32_t) (x46 >> 0x13);
-{ uint32_t x48 = ((uint32_t)x46 & 0x7ffff);
-{ uint64_t x49 = (x47 + x17);
-{ uint32_t x50 = (uint32_t) (x49 >> 0x13);
-{ uint32_t x51 = ((uint32_t)x49 & 0x7ffff);
-{ uint32_t x52 = (x27 + (0x13 * x50));
-{ uint32_t x53 = (x52 >> 0x13);
-{ uint32_t x54 = (x52 & 0x7ffff);
-{ uint32_t x55 = (x53 + x30);
-{ uint32_t x56 = (x55 >> 0x13);
-{ uint32_t x57 = (x55 & 0x7ffff);
-out[0] = x51;
-out[1] = x48;
-out[2] = x45;
-out[3] = x42;
-out[4] = x39;
-out[5] = x36;
-out[6] = x56 + x33;
-out[7] = x57;
-out[8] = x54;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void fesquare(uint32_t out[9], const uint32_t in1[9]) {
+ { const uint32_t x15 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x17 = (((uint64_t)x2 * x15) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x15 * x2)))))))));
+ { uint64_t x18 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x13 * ((uint64_t)x15 * x15)));
+ { uint64_t x19 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x13 * (((uint64_t)x16 * x15) + ((uint64_t)x15 * x16))));
+ { uint64_t x20 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x13 * (((uint64_t)x14 * x15) + (((uint64_t)x16 * x16) + ((uint64_t)x15 * x14)))));
+ { uint64_t x21 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x13 * (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))));
+ { uint64_t x22 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x13 * (((uint64_t)x10 * x15) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + ((uint64_t)x15 * x10)))))));
+ { uint64_t x23 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x13 * (((uint64_t)x8 * x15) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + ((uint64_t)x15 * x8))))))));
+ { uint64_t x24 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x13 * (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))));
+ { uint64_t x25 = (((uint64_t)x2 * x2) + (0x13 * (((uint64_t)x4 * x15) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + ((uint64_t)x15 * x4))))))))));
+ { uint32_t x26 = (uint32_t) (x25 >> 0x13);
+ { uint32_t x27 = ((uint32_t)x25 & 0x7ffff);
+ { uint64_t x28 = (x26 + x24);
+ { uint32_t x29 = (uint32_t) (x28 >> 0x13);
+ { uint32_t x30 = ((uint32_t)x28 & 0x7ffff);
+ { uint64_t x31 = (x29 + x23);
+ { uint32_t x32 = (uint32_t) (x31 >> 0x13);
+ { uint32_t x33 = ((uint32_t)x31 & 0x7ffff);
+ { uint64_t x34 = (x32 + x22);
+ { uint32_t x35 = (uint32_t) (x34 >> 0x13);
+ { uint32_t x36 = ((uint32_t)x34 & 0x7ffff);
+ { uint64_t x37 = (x35 + x21);
+ { uint32_t x38 = (uint32_t) (x37 >> 0x13);
+ { uint32_t x39 = ((uint32_t)x37 & 0x7ffff);
+ { uint64_t x40 = (x38 + x20);
+ { uint32_t x41 = (uint32_t) (x40 >> 0x13);
+ { uint32_t x42 = ((uint32_t)x40 & 0x7ffff);
+ { uint64_t x43 = (x41 + x19);
+ { uint32_t x44 = (uint32_t) (x43 >> 0x13);
+ { uint32_t x45 = ((uint32_t)x43 & 0x7ffff);
+ { uint64_t x46 = (x44 + x18);
+ { uint32_t x47 = (uint32_t) (x46 >> 0x13);
+ { uint32_t x48 = ((uint32_t)x46 & 0x7ffff);
+ { uint64_t x49 = (x47 + x17);
+ { uint32_t x50 = (uint32_t) (x49 >> 0x13);
+ { uint32_t x51 = ((uint32_t)x49 & 0x7ffff);
+ { uint32_t x52 = (x27 + (0x13 * x50));
+ { uint32_t x53 = (x52 >> 0x13);
+ { uint32_t x54 = (x52 & 0x7ffff);
+ { uint32_t x55 = (x53 + x30);
+ { uint32_t x56 = (x55 >> 0x13);
+ { uint32_t x57 = (x55 & 0x7ffff);
+ out[0] = x54;
+ out[1] = x57;
+ out[2] = (x56 + x33);
+ out[3] = x36;
+ out[4] = x39;
+ out[5] = x42;
+ out[6] = x45;
+ out[7] = x48;
+ out[8] = x51;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e171m19/freeze.c b/src/Specific/solinas32_2e171m19/freeze.c
index 433ddce0e..857956aaf 100644
--- a/src/Specific/solinas32_2e171m19/freeze.c
+++ b/src/Specific/solinas32_2e171m19/freeze.c
@@ -1,25 +1,49 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x18;
-out[1] = uint8_t x19 = Op Syntax.SubWithGetBorrow 19 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7ffed;;
+static void freeze(uint32_t out[9], const uint32_t in1[9]) {
+ { const uint32_t x15 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x7ffed);
+ { uint32_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x19, Return x4, 0x7ffff);
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x22, Return x6, 0x7ffff);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x8, 0x7ffff);
+ { uint32_t x30, uint8_t x31 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x28, Return x10, 0x7ffff);
+ { uint32_t x33, uint8_t x34 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x31, Return x12, 0x7ffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x34, Return x14, 0x7ffff);
+ { uint32_t x39, uint8_t x40 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x16, 0x7ffff);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x40, Return x15, 0x7ffff);
+ { uint32_t x44 = (uint32_t)cmovznz(x43, 0x0, 0xffffffff);
+ { uint32_t x45 = (x44 & 0x7ffed);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x18, Return x45);
+ { uint32_t x49 = (x44 & 0x7ffff);
+ { uint32_t x51, uint8_t x52 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x48, Return x21, Return x49);
+ { uint32_t x53 = (x44 & 0x7ffff);
+ { uint32_t x55, uint8_t x56 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x52, Return x24, Return x53);
+ { uint32_t x57 = (x44 & 0x7ffff);
+ { uint32_t x59, uint8_t x60 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x56, Return x27, Return x57);
+ { uint32_t x61 = (x44 & 0x7ffff);
+ { uint32_t x63, uint8_t x64 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x30, Return x61);
+ { uint32_t x65 = (x44 & 0x7ffff);
+ { uint32_t x67, uint8_t x68 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x64, Return x33, Return x65);
+ { uint32_t x69 = (x44 & 0x7ffff);
+ { uint32_t x71, uint8_t x72 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x68, Return x36, Return x69);
+ { uint32_t x73 = (x44 & 0x7ffff);
+ { uint32_t x75, uint8_t x76 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x39, Return x73);
+ { uint32_t x77 = (x44 & 0x7ffff);
+ { uint32_t x79, uint8_t _ = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x76, Return x42, Return x77);
+ out[0] = x47;
+ out[1] = x51;
+ out[2] = x55;
+ out[3] = x59;
+ out[4] = x63;
+ out[5] = x67;
+ out[6] = x71;
+ out[7] = x75;
+ out[8] = x79;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e174m17/femul.c b/src/Specific/solinas32_2e174m17/femul.c
index f8629dead..0686649fc 100644
--- a/src/Specific/solinas32_2e174m17/femul.c
+++ b/src/Specific/solinas32_2e174m17/femul.c
@@ -1,71 +1,71 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
-{ uint64_t x36 = (((uint64_t)x5 * x34) + ((0x2 * ((uint64_t)x7 * x35)) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + ((0x2 * ((uint64_t)x13 * x29)) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((0x2 * ((uint64_t)x19 * x23)) + ((uint64_t)x18 * x21)))))))));
-{ uint64_t x37 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x19 * x21)))))))) + (0x11 * ((uint64_t)x18 * x34)));
-{ uint64_t x38 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + (((uint64_t)x11 * x27) + ((0x2 * ((uint64_t)x13 * x25)) + ((0x2 * ((uint64_t)x15 * x23)) + ((uint64_t)x17 * x21))))))) + (0x11 * ((0x2 * ((uint64_t)x19 * x34)) + (0x2 * ((uint64_t)x18 * x35)))));
-{ uint64_t x39 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + ((0x2 * ((uint64_t)x13 * x23)) + ((uint64_t)x15 * x21)))))) + (0x11 * (((uint64_t)x17 * x34) + ((0x2 * ((uint64_t)x19 * x35)) + ((uint64_t)x18 * x33)))));
-{ uint64_t x40 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + ((uint64_t)x13 * x21))))) + (0x11 * (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))));
-{ uint64_t x41 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((uint64_t)x11 * x21)))) + (0x11 * ((0x2 * ((uint64_t)x13 * x34)) + ((0x2 * ((uint64_t)x15 * x35)) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + (0x2 * ((uint64_t)x18 * x29))))))));
-{ uint64_t x42 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((uint64_t)x9 * x21))) + (0x11 * (((uint64_t)x11 * x34) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x18 * x27))))))));
-{ uint64_t x43 = ((((uint64_t)x5 * x23) + ((uint64_t)x7 * x21)) + (0x11 * (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))));
-{ uint64_t x44 = (((uint64_t)x5 * x21) + (0x11 * ((0x2 * ((uint64_t)x7 * x34)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((0x2 * ((uint64_t)x19 * x25)) + (0x2 * ((uint64_t)x18 * x23)))))))))));
-{ uint32_t x45 = (uint32_t) (x44 >> 0x14);
-{ uint32_t x46 = ((uint32_t)x44 & 0xfffff);
-{ uint64_t x47 = (x45 + x43);
-{ uint32_t x48 = (uint32_t) (x47 >> 0x13);
-{ uint32_t x49 = ((uint32_t)x47 & 0x7ffff);
-{ uint64_t x50 = (x48 + x42);
-{ uint32_t x51 = (uint32_t) (x50 >> 0x13);
-{ uint32_t x52 = ((uint32_t)x50 & 0x7ffff);
-{ uint64_t x53 = (x51 + x41);
-{ uint32_t x54 = (uint32_t) (x53 >> 0x14);
-{ uint32_t x55 = ((uint32_t)x53 & 0xfffff);
-{ uint64_t x56 = (x54 + x40);
-{ uint32_t x57 = (uint32_t) (x56 >> 0x13);
-{ uint32_t x58 = ((uint32_t)x56 & 0x7ffff);
-{ uint64_t x59 = (x57 + x39);
-{ uint32_t x60 = (uint32_t) (x59 >> 0x13);
-{ uint32_t x61 = ((uint32_t)x59 & 0x7ffff);
-{ uint64_t x62 = (x60 + x38);
-{ uint32_t x63 = (uint32_t) (x62 >> 0x14);
-{ uint32_t x64 = ((uint32_t)x62 & 0xfffff);
-{ uint64_t x65 = (x63 + x37);
-{ uint32_t x66 = (uint32_t) (x65 >> 0x13);
-{ uint32_t x67 = ((uint32_t)x65 & 0x7ffff);
-{ uint64_t x68 = (x66 + x36);
-{ uint32_t x69 = (uint32_t) (x68 >> 0x13);
-{ uint32_t x70 = ((uint32_t)x68 & 0x7ffff);
-{ uint32_t x71 = (x46 + (0x11 * x69));
-{ uint32_t x72 = (x71 >> 0x14);
-{ uint32_t x73 = (x71 & 0xfffff);
-{ uint32_t x74 = (x72 + x49);
-{ uint32_t x75 = (x74 >> 0x13);
-{ uint32_t x76 = (x74 & 0x7ffff);
-out[0] = x70;
-out[1] = x67;
-out[2] = x64;
-out[3] = x61;
-out[4] = x58;
-out[5] = x55;
-out[6] = x75 + x52;
-out[7] = x76;
-out[8] = x73;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void femul(uint32_t out[9], const uint32_t in1[9], const uint32_t in2[9]) {
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x34 = in2[8];
+ { const uint32_t x35 = in2[7];
+ { const uint32_t x33 = in2[6];
+ { const uint32_t x31 = in2[5];
+ { const uint32_t x29 = in2[4];
+ { const uint32_t x27 = in2[3];
+ { const uint32_t x25 = in2[2];
+ { const uint32_t x23 = in2[1];
+ { const uint32_t x21 = in2[0];
+ { uint64_t x36 = (((uint64_t)x5 * x34) + ((0x2 * ((uint64_t)x7 * x35)) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + ((0x2 * ((uint64_t)x13 * x29)) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((0x2 * ((uint64_t)x19 * x23)) + ((uint64_t)x18 * x21)))))))));
+ { uint64_t x37 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x19 * x21)))))))) + (0x11 * ((uint64_t)x18 * x34)));
+ { uint64_t x38 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + (((uint64_t)x11 * x27) + ((0x2 * ((uint64_t)x13 * x25)) + ((0x2 * ((uint64_t)x15 * x23)) + ((uint64_t)x17 * x21))))))) + (0x11 * ((0x2 * ((uint64_t)x19 * x34)) + (0x2 * ((uint64_t)x18 * x35)))));
+ { uint64_t x39 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + ((0x2 * ((uint64_t)x13 * x23)) + ((uint64_t)x15 * x21)))))) + (0x11 * (((uint64_t)x17 * x34) + ((0x2 * ((uint64_t)x19 * x35)) + ((uint64_t)x18 * x33)))));
+ { uint64_t x40 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + ((uint64_t)x13 * x21))))) + (0x11 * (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))));
+ { uint64_t x41 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((uint64_t)x11 * x21)))) + (0x11 * ((0x2 * ((uint64_t)x13 * x34)) + ((0x2 * ((uint64_t)x15 * x35)) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + (0x2 * ((uint64_t)x18 * x29))))))));
+ { uint64_t x42 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((uint64_t)x9 * x21))) + (0x11 * (((uint64_t)x11 * x34) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x18 * x27))))))));
+ { uint64_t x43 = ((((uint64_t)x5 * x23) + ((uint64_t)x7 * x21)) + (0x11 * (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))));
+ { uint64_t x44 = (((uint64_t)x5 * x21) + (0x11 * ((0x2 * ((uint64_t)x7 * x34)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((0x2 * ((uint64_t)x19 * x25)) + (0x2 * ((uint64_t)x18 * x23)))))))))));
+ { uint32_t x45 = (uint32_t) (x44 >> 0x14);
+ { uint32_t x46 = ((uint32_t)x44 & 0xfffff);
+ { uint64_t x47 = (x45 + x43);
+ { uint32_t x48 = (uint32_t) (x47 >> 0x13);
+ { uint32_t x49 = ((uint32_t)x47 & 0x7ffff);
+ { uint64_t x50 = (x48 + x42);
+ { uint32_t x51 = (uint32_t) (x50 >> 0x13);
+ { uint32_t x52 = ((uint32_t)x50 & 0x7ffff);
+ { uint64_t x53 = (x51 + x41);
+ { uint32_t x54 = (uint32_t) (x53 >> 0x14);
+ { uint32_t x55 = ((uint32_t)x53 & 0xfffff);
+ { uint64_t x56 = (x54 + x40);
+ { uint32_t x57 = (uint32_t) (x56 >> 0x13);
+ { uint32_t x58 = ((uint32_t)x56 & 0x7ffff);
+ { uint64_t x59 = (x57 + x39);
+ { uint32_t x60 = (uint32_t) (x59 >> 0x13);
+ { uint32_t x61 = ((uint32_t)x59 & 0x7ffff);
+ { uint64_t x62 = (x60 + x38);
+ { uint32_t x63 = (uint32_t) (x62 >> 0x14);
+ { uint32_t x64 = ((uint32_t)x62 & 0xfffff);
+ { uint64_t x65 = (x63 + x37);
+ { uint32_t x66 = (uint32_t) (x65 >> 0x13);
+ { uint32_t x67 = ((uint32_t)x65 & 0x7ffff);
+ { uint64_t x68 = (x66 + x36);
+ { uint32_t x69 = (uint32_t) (x68 >> 0x13);
+ { uint32_t x70 = ((uint32_t)x68 & 0x7ffff);
+ { uint32_t x71 = (x46 + (0x11 * x69));
+ { uint32_t x72 = (x71 >> 0x14);
+ { uint32_t x73 = (x71 & 0xfffff);
+ { uint32_t x74 = (x72 + x49);
+ { uint32_t x75 = (x74 >> 0x13);
+ { uint32_t x76 = (x74 & 0x7ffff);
+ out[0] = x73;
+ out[1] = x76;
+ out[2] = (x75 + x52);
+ out[3] = x55;
+ out[4] = x58;
+ out[5] = x61;
+ out[6] = x64;
+ out[7] = x67;
+ out[8] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e174m17/fesquare.c b/src/Specific/solinas32_2e174m17/fesquare.c
index 1e1c5913c..e1ee9c61b 100644
--- a/src/Specific/solinas32_2e174m17/fesquare.c
+++ b/src/Specific/solinas32_2e174m17/fesquare.c
@@ -1,71 +1,62 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x17 = (((uint64_t)x2 * x15) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + ((0x2 * ((uint64_t)x10 * x10)) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x15 * x2)))))))));
-{ uint64_t x18 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x11 * ((uint64_t)x15 * x15)));
-{ uint64_t x19 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x11 * ((0x2 * ((uint64_t)x16 * x15)) + (0x2 * ((uint64_t)x15 * x16)))));
-{ uint64_t x20 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x11 * (((uint64_t)x14 * x15) + ((0x2 * ((uint64_t)x16 * x16)) + ((uint64_t)x15 * x14)))));
-{ uint64_t x21 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x11 * (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))));
-{ uint64_t x22 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x11 * ((0x2 * ((uint64_t)x10 * x15)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + (0x2 * ((uint64_t)x15 * x10))))))));
-{ uint64_t x23 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x11 * (((uint64_t)x8 * x15) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + ((uint64_t)x15 * x8))))))));
-{ uint64_t x24 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x11 * (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))));
-{ uint64_t x25 = (((uint64_t)x2 * x2) + (0x11 * ((0x2 * ((uint64_t)x4 * x15)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + (0x2 * ((uint64_t)x15 * x4)))))))))));
-{ uint32_t x26 = (uint32_t) (x25 >> 0x14);
-{ uint32_t x27 = ((uint32_t)x25 & 0xfffff);
-{ uint64_t x28 = (x26 + x24);
-{ uint32_t x29 = (uint32_t) (x28 >> 0x13);
-{ uint32_t x30 = ((uint32_t)x28 & 0x7ffff);
-{ uint64_t x31 = (x29 + x23);
-{ uint32_t x32 = (uint32_t) (x31 >> 0x13);
-{ uint32_t x33 = ((uint32_t)x31 & 0x7ffff);
-{ uint64_t x34 = (x32 + x22);
-{ uint32_t x35 = (uint32_t) (x34 >> 0x14);
-{ uint32_t x36 = ((uint32_t)x34 & 0xfffff);
-{ uint64_t x37 = (x35 + x21);
-{ uint32_t x38 = (uint32_t) (x37 >> 0x13);
-{ uint32_t x39 = ((uint32_t)x37 & 0x7ffff);
-{ uint64_t x40 = (x38 + x20);
-{ uint32_t x41 = (uint32_t) (x40 >> 0x13);
-{ uint32_t x42 = ((uint32_t)x40 & 0x7ffff);
-{ uint64_t x43 = (x41 + x19);
-{ uint32_t x44 = (uint32_t) (x43 >> 0x14);
-{ uint32_t x45 = ((uint32_t)x43 & 0xfffff);
-{ uint64_t x46 = (x44 + x18);
-{ uint32_t x47 = (uint32_t) (x46 >> 0x13);
-{ uint32_t x48 = ((uint32_t)x46 & 0x7ffff);
-{ uint64_t x49 = (x47 + x17);
-{ uint32_t x50 = (uint32_t) (x49 >> 0x13);
-{ uint32_t x51 = ((uint32_t)x49 & 0x7ffff);
-{ uint32_t x52 = (x27 + (0x11 * x50));
-{ uint32_t x53 = (x52 >> 0x14);
-{ uint32_t x54 = (x52 & 0xfffff);
-{ uint32_t x55 = (x53 + x30);
-{ uint32_t x56 = (x55 >> 0x13);
-{ uint32_t x57 = (x55 & 0x7ffff);
-out[0] = x51;
-out[1] = x48;
-out[2] = x45;
-out[3] = x42;
-out[4] = x39;
-out[5] = x36;
-out[6] = x56 + x33;
-out[7] = x57;
-out[8] = x54;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void fesquare(uint32_t out[9], const uint32_t in1[9]) {
+ { const uint32_t x15 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x17 = (((uint64_t)x2 * x15) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + ((0x2 * ((uint64_t)x10 * x10)) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x15 * x2)))))))));
+ { uint64_t x18 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x11 * ((uint64_t)x15 * x15)));
+ { uint64_t x19 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x11 * ((0x2 * ((uint64_t)x16 * x15)) + (0x2 * ((uint64_t)x15 * x16)))));
+ { uint64_t x20 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x11 * (((uint64_t)x14 * x15) + ((0x2 * ((uint64_t)x16 * x16)) + ((uint64_t)x15 * x14)))));
+ { uint64_t x21 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x11 * (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))));
+ { uint64_t x22 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x11 * ((0x2 * ((uint64_t)x10 * x15)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + (0x2 * ((uint64_t)x15 * x10))))))));
+ { uint64_t x23 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x11 * (((uint64_t)x8 * x15) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + ((uint64_t)x15 * x8))))))));
+ { uint64_t x24 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x11 * (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))));
+ { uint64_t x25 = (((uint64_t)x2 * x2) + (0x11 * ((0x2 * ((uint64_t)x4 * x15)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + (0x2 * ((uint64_t)x15 * x4)))))))))));
+ { uint32_t x26 = (uint32_t) (x25 >> 0x14);
+ { uint32_t x27 = ((uint32_t)x25 & 0xfffff);
+ { uint64_t x28 = (x26 + x24);
+ { uint32_t x29 = (uint32_t) (x28 >> 0x13);
+ { uint32_t x30 = ((uint32_t)x28 & 0x7ffff);
+ { uint64_t x31 = (x29 + x23);
+ { uint32_t x32 = (uint32_t) (x31 >> 0x13);
+ { uint32_t x33 = ((uint32_t)x31 & 0x7ffff);
+ { uint64_t x34 = (x32 + x22);
+ { uint32_t x35 = (uint32_t) (x34 >> 0x14);
+ { uint32_t x36 = ((uint32_t)x34 & 0xfffff);
+ { uint64_t x37 = (x35 + x21);
+ { uint32_t x38 = (uint32_t) (x37 >> 0x13);
+ { uint32_t x39 = ((uint32_t)x37 & 0x7ffff);
+ { uint64_t x40 = (x38 + x20);
+ { uint32_t x41 = (uint32_t) (x40 >> 0x13);
+ { uint32_t x42 = ((uint32_t)x40 & 0x7ffff);
+ { uint64_t x43 = (x41 + x19);
+ { uint32_t x44 = (uint32_t) (x43 >> 0x14);
+ { uint32_t x45 = ((uint32_t)x43 & 0xfffff);
+ { uint64_t x46 = (x44 + x18);
+ { uint32_t x47 = (uint32_t) (x46 >> 0x13);
+ { uint32_t x48 = ((uint32_t)x46 & 0x7ffff);
+ { uint64_t x49 = (x47 + x17);
+ { uint32_t x50 = (uint32_t) (x49 >> 0x13);
+ { uint32_t x51 = ((uint32_t)x49 & 0x7ffff);
+ { uint32_t x52 = (x27 + (0x11 * x50));
+ { uint32_t x53 = (x52 >> 0x14);
+ { uint32_t x54 = (x52 & 0xfffff);
+ { uint32_t x55 = (x53 + x30);
+ { uint32_t x56 = (x55 >> 0x13);
+ { uint32_t x57 = (x55 & 0x7ffff);
+ out[0] = x54;
+ out[1] = x57;
+ out[2] = (x56 + x33);
+ out[3] = x36;
+ out[4] = x39;
+ out[5] = x42;
+ out[6] = x45;
+ out[7] = x48;
+ out[8] = x51;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e174m17/freeze.c b/src/Specific/solinas32_2e174m17/freeze.c
index 2db75709d..30bc800c9 100644
--- a/src/Specific/solinas32_2e174m17/freeze.c
+++ b/src/Specific/solinas32_2e174m17/freeze.c
@@ -1,25 +1,49 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x18;
-out[1] = uint8_t x19 = Op Syntax.SubWithGetBorrow 20 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xfffef;;
+static void freeze(uint32_t out[9], const uint32_t in1[9]) {
+ { const uint32_t x15 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0xfffef);
+ { uint32_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x19, Return x4, 0x7ffff);
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x22, Return x6, 0x7ffff);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x8, 0xfffff);
+ { uint32_t x30, uint8_t x31 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x28, Return x10, 0x7ffff);
+ { uint32_t x33, uint8_t x34 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x31, Return x12, 0x7ffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x34, Return x14, 0xfffff);
+ { uint32_t x39, uint8_t x40 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x16, 0x7ffff);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x40, Return x15, 0x7ffff);
+ { uint32_t x44 = (uint32_t)cmovznz(x43, 0x0, 0xffffffff);
+ { uint32_t x45 = (x44 & 0xfffef);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x18, Return x45);
+ { uint32_t x49 = (x44 & 0x7ffff);
+ { uint32_t x51, uint8_t x52 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x48, Return x21, Return x49);
+ { uint32_t x53 = (x44 & 0x7ffff);
+ { uint32_t x55, uint8_t x56 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x52, Return x24, Return x53);
+ { uint32_t x57 = (x44 & 0xfffff);
+ { uint32_t x59, uint8_t x60 = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x56, Return x27, Return x57);
+ { uint32_t x61 = (x44 & 0x7ffff);
+ { uint32_t x63, uint8_t x64 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x30, Return x61);
+ { uint32_t x65 = (x44 & 0x7ffff);
+ { uint32_t x67, uint8_t x68 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x64, Return x33, Return x65);
+ { uint32_t x69 = (x44 & 0xfffff);
+ { uint32_t x71, uint8_t x72 = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x68, Return x36, Return x69);
+ { uint32_t x73 = (x44 & 0x7ffff);
+ { uint32_t x75, uint8_t x76 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x39, Return x73);
+ { uint32_t x77 = (x44 & 0x7ffff);
+ { uint32_t x79, uint8_t _ = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x76, Return x42, Return x77);
+ out[0] = x47;
+ out[1] = x51;
+ out[2] = x55;
+ out[3] = x59;
+ out[4] = x63;
+ out[5] = x67;
+ out[6] = x71;
+ out[7] = x75;
+ out[8] = x79;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e174m3/femul.c b/src/Specific/solinas32_2e174m3/femul.c
index 509725888..3ad886b74 100644
--- a/src/Specific/solinas32_2e174m3/femul.c
+++ b/src/Specific/solinas32_2e174m3/femul.c
@@ -1,56 +1,50 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ ℤ x24 = (((uint64_t)x5 * x22) +ℤ (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + (((uint64_t)x13 * x17) + ((uint64_t)x12 * x15))))));
-{ ℤ x25 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + (((uint64_t)x9 * x19) + (((uint64_t)x11 * x17) + ((uint64_t)x13 * x15))))) +ℤ (0x3 * ((uint64_t)x12 * x22)));
-{ ℤ x26 = ((((uint64_t)x5 * x21) + (((uint64_t)x7 * x19) + (((uint64_t)x9 * x17) + ((uint64_t)x11 * x15)))) +ℤ (0x3 *ℤ (((uint64_t)x13 * x22) + ((uint64_t)x12 * x23))));
-{ ℤ x27 = ((((uint64_t)x5 * x19) + (((uint64_t)x7 * x17) + ((uint64_t)x9 * x15))) +ℤ (0x3 *ℤ (((uint64_t)x11 * x22) + (((uint64_t)x13 * x23) + ((uint64_t)x12 * x21)))));
-{ ℤ x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) +ℤ (0x3 *ℤ (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19))))));
-{ ℤ x29 = (((uint64_t)x5 * x15) +ℤ (0x3 *ℤ (((uint64_t)x7 * x22) + (((uint64_t)x9 * x23) + (((uint64_t)x11 * x21) + (((uint64_t)x13 * x19) + ((uint64_t)x12 * x17)))))));
-{ uint64_t x30 = (x29 >> 0x1d);
-{ uint32_t x31 = (x29 & 0x1fffffff);
-{ ℤ x32 = (x30 +ℤ x28);
-{ uint64_t x33 = (x32 >> 0x1d);
-{ uint32_t x34 = (x32 & 0x1fffffff);
-{ ℤ x35 = (x33 +ℤ x27);
-{ uint64_t x36 = (x35 >> 0x1d);
-{ uint32_t x37 = (x35 & 0x1fffffff);
-{ ℤ x38 = (x36 +ℤ x26);
-{ uint64_t x39 = (x38 >> 0x1d);
-{ uint32_t x40 = (x38 & 0x1fffffff);
-{ ℤ x41 = (x39 +ℤ x25);
-{ uint64_t x42 = (x41 >> 0x1d);
-{ uint32_t x43 = (x41 & 0x1fffffff);
-{ ℤ x44 = (x42 +ℤ x24);
-{ uint64_t x45 = (x44 >> 0x1d);
-{ uint32_t x46 = (x44 & 0x1fffffff);
-{ uint64_t x47 = (x31 + (0x3 * x45));
-{ uint32_t x48 = (uint32_t) (x47 >> 0x1d);
-{ uint32_t x49 = ((uint32_t)x47 & 0x1fffffff);
-{ uint32_t x50 = (x48 + x34);
-{ uint32_t x51 = (x50 >> 0x1d);
-{ uint32_t x52 = (x50 & 0x1fffffff);
-out[0] = x46;
-out[1] = x43;
-out[2] = x40;
-out[3] = x51 + x37;
-out[4] = x52;
-out[5] = x49;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint32_t out[6], const uint32_t in1[6], const uint32_t in2[6]) {
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x22 = in2[5];
+ { const uint32_t x23 = in2[4];
+ { const uint32_t x21 = in2[3];
+ { const uint32_t x19 = in2[2];
+ { const uint32_t x17 = in2[1];
+ { const uint32_t x15 = in2[0];
+ { ℤ x24 = (((uint64_t)x5 * x22) +ℤ (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + (((uint64_t)x13 * x17) + ((uint64_t)x12 * x15))))));
+ { ℤ x25 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + (((uint64_t)x9 * x19) + (((uint64_t)x11 * x17) + ((uint64_t)x13 * x15))))) +ℤ (0x3 * ((uint64_t)x12 * x22)));
+ { ℤ x26 = ((((uint64_t)x5 * x21) + (((uint64_t)x7 * x19) + (((uint64_t)x9 * x17) + ((uint64_t)x11 * x15)))) +ℤ (0x3 *ℤ (((uint64_t)x13 * x22) + ((uint64_t)x12 * x23))));
+ { ℤ x27 = ((((uint64_t)x5 * x19) + (((uint64_t)x7 * x17) + ((uint64_t)x9 * x15))) +ℤ (0x3 *ℤ (((uint64_t)x11 * x22) + (((uint64_t)x13 * x23) + ((uint64_t)x12 * x21)))));
+ { ℤ x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) +ℤ (0x3 *ℤ (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19))))));
+ { ℤ x29 = (((uint64_t)x5 * x15) +ℤ (0x3 *ℤ (((uint64_t)x7 * x22) + (((uint64_t)x9 * x23) + (((uint64_t)x11 * x21) + (((uint64_t)x13 * x19) + ((uint64_t)x12 * x17)))))));
+ { uint64_t x30 = (x29 >> 0x1d);
+ { uint32_t x31 = (x29 & 0x1fffffff);
+ { ℤ x32 = (x30 +ℤ x28);
+ { uint64_t x33 = (x32 >> 0x1d);
+ { uint32_t x34 = (x32 & 0x1fffffff);
+ { ℤ x35 = (x33 +ℤ x27);
+ { uint64_t x36 = (x35 >> 0x1d);
+ { uint32_t x37 = (x35 & 0x1fffffff);
+ { ℤ x38 = (x36 +ℤ x26);
+ { uint64_t x39 = (x38 >> 0x1d);
+ { uint32_t x40 = (x38 & 0x1fffffff);
+ { ℤ x41 = (x39 +ℤ x25);
+ { uint64_t x42 = (x41 >> 0x1d);
+ { uint32_t x43 = (x41 & 0x1fffffff);
+ { ℤ x44 = (x42 +ℤ x24);
+ { uint64_t x45 = (x44 >> 0x1d);
+ { uint32_t x46 = (x44 & 0x1fffffff);
+ { uint64_t x47 = (x31 + (0x3 * x45));
+ { uint32_t x48 = (uint32_t) (x47 >> 0x1d);
+ { uint32_t x49 = ((uint32_t)x47 & 0x1fffffff);
+ { uint32_t x50 = (x48 + x34);
+ { uint32_t x51 = (x50 >> 0x1d);
+ { uint32_t x52 = (x50 & 0x1fffffff);
+ out[0] = x49;
+ out[1] = x52;
+ out[2] = (x51 + x37);
+ out[3] = x40;
+ out[4] = x43;
+ out[5] = x46;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e174m3/fesquare.c b/src/Specific/solinas32_2e174m3/fesquare.c
index 6d8132a1a..043ae7ba3 100644
--- a/src/Specific/solinas32_2e174m3/fesquare.c
+++ b/src/Specific/solinas32_2e174m3/fesquare.c
@@ -1,56 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ ℤ x11 = (((uint64_t)x2 * x9) +ℤ (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x9 * x2))))));
-{ ℤ x12 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) +ℤ (0x3 * ((uint64_t)x9 * x9)));
-{ ℤ x13 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) +ℤ (0x3 *ℤ (((uint64_t)x10 * x9) + ((uint64_t)x9 * x10))));
-{ ℤ x14 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) +ℤ (0x3 *ℤ (((uint64_t)x8 * x9) + (((uint64_t)x10 * x10) + ((uint64_t)x9 * x8)))));
-{ ℤ x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) +ℤ (0x3 *ℤ (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6))))));
-{ ℤ x16 = (((uint64_t)x2 * x2) +ℤ (0x3 *ℤ (((uint64_t)x4 * x9) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + ((uint64_t)x9 * x4)))))));
-{ uint64_t x17 = (x16 >> 0x1d);
-{ uint32_t x18 = (x16 & 0x1fffffff);
-{ ℤ x19 = (x17 +ℤ x15);
-{ uint64_t x20 = (x19 >> 0x1d);
-{ uint32_t x21 = (x19 & 0x1fffffff);
-{ ℤ x22 = (x20 +ℤ x14);
-{ uint64_t x23 = (x22 >> 0x1d);
-{ uint32_t x24 = (x22 & 0x1fffffff);
-{ ℤ x25 = (x23 +ℤ x13);
-{ uint64_t x26 = (x25 >> 0x1d);
-{ uint32_t x27 = (x25 & 0x1fffffff);
-{ ℤ x28 = (x26 +ℤ x12);
-{ uint64_t x29 = (x28 >> 0x1d);
-{ uint32_t x30 = (x28 & 0x1fffffff);
-{ ℤ x31 = (x29 +ℤ x11);
-{ uint64_t x32 = (x31 >> 0x1d);
-{ uint32_t x33 = (x31 & 0x1fffffff);
-{ uint64_t x34 = (x18 + (0x3 * x32));
-{ uint32_t x35 = (uint32_t) (x34 >> 0x1d);
-{ uint32_t x36 = ((uint32_t)x34 & 0x1fffffff);
-{ uint32_t x37 = (x35 + x21);
-{ uint32_t x38 = (x37 >> 0x1d);
-{ uint32_t x39 = (x37 & 0x1fffffff);
-out[0] = x33;
-out[1] = x30;
-out[2] = x27;
-out[3] = x38 + x24;
-out[4] = x39;
-out[5] = x36;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesquare(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { ℤ x11 = (((uint64_t)x2 * x9) +ℤ (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x9 * x2))))));
+ { ℤ x12 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) +ℤ (0x3 * ((uint64_t)x9 * x9)));
+ { ℤ x13 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) +ℤ (0x3 *ℤ (((uint64_t)x10 * x9) + ((uint64_t)x9 * x10))));
+ { ℤ x14 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) +ℤ (0x3 *ℤ (((uint64_t)x8 * x9) + (((uint64_t)x10 * x10) + ((uint64_t)x9 * x8)))));
+ { ℤ x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) +ℤ (0x3 *ℤ (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6))))));
+ { ℤ x16 = (((uint64_t)x2 * x2) +ℤ (0x3 *ℤ (((uint64_t)x4 * x9) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + ((uint64_t)x9 * x4)))))));
+ { uint64_t x17 = (x16 >> 0x1d);
+ { uint32_t x18 = (x16 & 0x1fffffff);
+ { ℤ x19 = (x17 +ℤ x15);
+ { uint64_t x20 = (x19 >> 0x1d);
+ { uint32_t x21 = (x19 & 0x1fffffff);
+ { ℤ x22 = (x20 +ℤ x14);
+ { uint64_t x23 = (x22 >> 0x1d);
+ { uint32_t x24 = (x22 & 0x1fffffff);
+ { ℤ x25 = (x23 +ℤ x13);
+ { uint64_t x26 = (x25 >> 0x1d);
+ { uint32_t x27 = (x25 & 0x1fffffff);
+ { ℤ x28 = (x26 +ℤ x12);
+ { uint64_t x29 = (x28 >> 0x1d);
+ { uint32_t x30 = (x28 & 0x1fffffff);
+ { ℤ x31 = (x29 +ℤ x11);
+ { uint64_t x32 = (x31 >> 0x1d);
+ { uint32_t x33 = (x31 & 0x1fffffff);
+ { uint64_t x34 = (x18 + (0x3 * x32));
+ { uint32_t x35 = (uint32_t) (x34 >> 0x1d);
+ { uint32_t x36 = ((uint32_t)x34 & 0x1fffffff);
+ { uint32_t x37 = (x35 + x21);
+ { uint32_t x38 = (x37 >> 0x1d);
+ { uint32_t x39 = (x37 & 0x1fffffff);
+ out[0] = x36;
+ out[1] = x39;
+ out[2] = (x38 + x24);
+ out[3] = x27;
+ out[4] = x30;
+ out[5] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e174m3/freeze.c b/src/Specific/solinas32_2e174m3/freeze.c
index 4dd93cd80..6132d303e 100644
--- a/src/Specific/solinas32_2e174m3/freeze.c
+++ b/src/Specific/solinas32_2e174m3/freeze.c
@@ -1,25 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x12;
-out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 29 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x1ffffffd;;
+static void freeze(uint32_t out[6], const uint32_t in1[6]) {
+ { const uint32_t x9 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x1ffffffd);
+ { uint32_t x15, uint8_t x16 = Op (Syntax.SubWithGetBorrow 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x13, Return x4, 0x1fffffff);
+ { uint32_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x16, Return x6, 0x1fffffff);
+ { uint32_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x19, Return x8, 0x1fffffff);
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x22, Return x10, 0x1fffffff);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x9, 0x1fffffff);
+ { uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+ { uint32_t x30 = (x29 & 0x1ffffffd);
+ { uint32_t x32, uint8_t x33 = Op (Syntax.AddWithGetCarry 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x12, Return x30);
+ { uint32_t x34 = (x29 & 0x1fffffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.AddWithGetCarry 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x15, Return x34);
+ { uint32_t x38 = (x29 & 0x1fffffff);
+ { uint32_t x40, uint8_t x41 = Op (Syntax.AddWithGetCarry 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x18, Return x38);
+ { uint32_t x42 = (x29 & 0x1fffffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.AddWithGetCarry 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x41, Return x21, Return x42);
+ { uint32_t x46 = (x29 & 0x1fffffff);
+ { uint32_t x48, uint8_t x49 = Op (Syntax.AddWithGetCarry 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x24, Return x46);
+ { uint32_t x50 = (x29 & 0x1fffffff);
+ { uint32_t x52, uint8_t _ = Op (Syntax.AddWithGetCarry 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x49, Return x27, Return x50);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e189m25/femul.c b/src/Specific/solinas32_2e189m25/femul.c
index 498964086..9da02e886 100644
--- a/src/Specific/solinas32_2e189m25/femul.c
+++ b/src/Specific/solinas32_2e189m25/femul.c
@@ -1,61 +1,57 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint64_t x28 = (((uint64_t)x5 * x26) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + (((uint64_t)x15 * x19) + ((uint64_t)x14 * x17)))))));
-{ uint64_t x29 = ((((uint64_t)x5 * x27) + (((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + (((uint64_t)x11 * x21) + (((uint64_t)x13 * x19) + ((uint64_t)x15 * x17)))))) + (0x19 * ((uint64_t)x14 * x26)));
-{ uint64_t x30 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + ((uint64_t)x13 * x17))))) + (0x19 * (((uint64_t)x15 * x26) + ((uint64_t)x14 * x27))));
-{ uint64_t x31 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + (((uint64_t)x9 * x19) + ((uint64_t)x11 * x17)))) + (0x19 * (((uint64_t)x13 * x26) + (((uint64_t)x15 * x27) + ((uint64_t)x14 * x25)))));
-{ ℤ x32 = ((((uint64_t)x5 * x21) + (((uint64_t)x7 * x19) + ((uint64_t)x9 * x17))) +ℤ (0x19 *ℤ (((uint64_t)x11 * x26) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + ((uint64_t)x14 * x23))))));
-{ ℤ x33 = ((((uint64_t)x5 * x19) + ((uint64_t)x7 * x17)) +ℤ (0x19 *ℤ (((uint64_t)x9 * x26) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + ((uint64_t)x14 * x21)))))));
-{ ℤ x34 = (((uint64_t)x5 * x17) +ℤ (0x19 *ℤ (((uint64_t)x7 * x26) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + (((uint64_t)x15 * x21) + ((uint64_t)x14 * x19))))))));
-{ uint64_t x35 = (x34 >> 0x1b);
-{ uint32_t x36 = (x34 & 0x7ffffff);
-{ ℤ x37 = (x35 +ℤ x33);
-{ uint64_t x38 = (x37 >> 0x1b);
-{ uint32_t x39 = (x37 & 0x7ffffff);
-{ ℤ x40 = (x38 +ℤ x32);
-{ uint64_t x41 = (x40 >> 0x1b);
-{ uint32_t x42 = (x40 & 0x7ffffff);
-{ uint64_t x43 = (x41 + x31);
-{ uint64_t x44 = (x43 >> 0x1b);
-{ uint32_t x45 = ((uint32_t)x43 & 0x7ffffff);
-{ uint64_t x46 = (x44 + x30);
-{ uint64_t x47 = (x46 >> 0x1b);
-{ uint32_t x48 = ((uint32_t)x46 & 0x7ffffff);
-{ uint64_t x49 = (x47 + x29);
-{ uint64_t x50 = (x49 >> 0x1b);
-{ uint32_t x51 = ((uint32_t)x49 & 0x7ffffff);
-{ uint64_t x52 = (x50 + x28);
-{ uint64_t x53 = (x52 >> 0x1b);
-{ uint32_t x54 = ((uint32_t)x52 & 0x7ffffff);
-{ uint64_t x55 = (x36 + (0x19 * x53));
-{ uint32_t x56 = (uint32_t) (x55 >> 0x1b);
-{ uint32_t x57 = ((uint32_t)x55 & 0x7ffffff);
-{ uint32_t x58 = (x56 + x39);
-{ uint32_t x59 = (x58 >> 0x1b);
-{ uint32_t x60 = (x58 & 0x7ffffff);
-out[0] = x54;
-out[1] = x51;
-out[2] = x48;
-out[3] = x45;
-out[4] = x59 + x42;
-out[5] = x60;
-out[6] = x57;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void femul(uint32_t out[7], const uint32_t in1[7], const uint32_t in2[7]) {
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x26 = in2[6];
+ { const uint32_t x27 = in2[5];
+ { const uint32_t x25 = in2[4];
+ { const uint32_t x23 = in2[3];
+ { const uint32_t x21 = in2[2];
+ { const uint32_t x19 = in2[1];
+ { const uint32_t x17 = in2[0];
+ { uint64_t x28 = (((uint64_t)x5 * x26) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + (((uint64_t)x15 * x19) + ((uint64_t)x14 * x17)))))));
+ { uint64_t x29 = ((((uint64_t)x5 * x27) + (((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + (((uint64_t)x11 * x21) + (((uint64_t)x13 * x19) + ((uint64_t)x15 * x17)))))) + (0x19 * ((uint64_t)x14 * x26)));
+ { uint64_t x30 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + ((uint64_t)x13 * x17))))) + (0x19 * (((uint64_t)x15 * x26) + ((uint64_t)x14 * x27))));
+ { uint64_t x31 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + (((uint64_t)x9 * x19) + ((uint64_t)x11 * x17)))) + (0x19 * (((uint64_t)x13 * x26) + (((uint64_t)x15 * x27) + ((uint64_t)x14 * x25)))));
+ { ℤ x32 = ((((uint64_t)x5 * x21) + (((uint64_t)x7 * x19) + ((uint64_t)x9 * x17))) +ℤ (0x19 *ℤ (((uint64_t)x11 * x26) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + ((uint64_t)x14 * x23))))));
+ { ℤ x33 = ((((uint64_t)x5 * x19) + ((uint64_t)x7 * x17)) +ℤ (0x19 *ℤ (((uint64_t)x9 * x26) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + ((uint64_t)x14 * x21)))))));
+ { ℤ x34 = (((uint64_t)x5 * x17) +ℤ (0x19 *ℤ (((uint64_t)x7 * x26) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + (((uint64_t)x15 * x21) + ((uint64_t)x14 * x19))))))));
+ { uint64_t x35 = (x34 >> 0x1b);
+ { uint32_t x36 = (x34 & 0x7ffffff);
+ { ℤ x37 = (x35 +ℤ x33);
+ { uint64_t x38 = (x37 >> 0x1b);
+ { uint32_t x39 = (x37 & 0x7ffffff);
+ { ℤ x40 = (x38 +ℤ x32);
+ { uint64_t x41 = (x40 >> 0x1b);
+ { uint32_t x42 = (x40 & 0x7ffffff);
+ { uint64_t x43 = (x41 + x31);
+ { uint64_t x44 = (x43 >> 0x1b);
+ { uint32_t x45 = ((uint32_t)x43 & 0x7ffffff);
+ { uint64_t x46 = (x44 + x30);
+ { uint64_t x47 = (x46 >> 0x1b);
+ { uint32_t x48 = ((uint32_t)x46 & 0x7ffffff);
+ { uint64_t x49 = (x47 + x29);
+ { uint64_t x50 = (x49 >> 0x1b);
+ { uint32_t x51 = ((uint32_t)x49 & 0x7ffffff);
+ { uint64_t x52 = (x50 + x28);
+ { uint64_t x53 = (x52 >> 0x1b);
+ { uint32_t x54 = ((uint32_t)x52 & 0x7ffffff);
+ { uint64_t x55 = (x36 + (0x19 * x53));
+ { uint32_t x56 = (uint32_t) (x55 >> 0x1b);
+ { uint32_t x57 = ((uint32_t)x55 & 0x7ffffff);
+ { uint32_t x58 = (x56 + x39);
+ { uint32_t x59 = (x58 >> 0x1b);
+ { uint32_t x60 = (x58 & 0x7ffffff);
+ out[0] = x57;
+ out[1] = x60;
+ out[2] = (x59 + x42);
+ out[3] = x45;
+ out[4] = x48;
+ out[5] = x51;
+ out[6] = x54;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e189m25/fesquare.c b/src/Specific/solinas32_2e189m25/fesquare.c
index c3ba4208b..2d8ae4b5a 100644
--- a/src/Specific/solinas32_2e189m25/fesquare.c
+++ b/src/Specific/solinas32_2e189m25/fesquare.c
@@ -1,61 +1,50 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x13 = (((uint64_t)x2 * x11) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x11 * x2)))))));
-{ uint64_t x14 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x19 * ((uint64_t)x11 * x11)));
-{ uint64_t x15 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x19 * (((uint64_t)x12 * x11) + ((uint64_t)x11 * x12))));
-{ uint64_t x16 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x19 * (((uint64_t)x10 * x11) + (((uint64_t)x12 * x12) + ((uint64_t)x11 * x10)))));
-{ ℤ x17 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) +ℤ (0x19 *ℤ (((uint64_t)x8 * x11) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + ((uint64_t)x11 * x8))))));
-{ ℤ x18 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) +ℤ (0x19 *ℤ (((uint64_t)x6 * x11) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + ((uint64_t)x11 * x6)))))));
-{ ℤ x19 = (((uint64_t)x2 * x2) +ℤ (0x19 *ℤ (((uint64_t)x4 * x11) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + ((uint64_t)x11 * x4))))))));
-{ uint64_t x20 = (x19 >> 0x1b);
-{ uint32_t x21 = (x19 & 0x7ffffff);
-{ ℤ x22 = (x20 +ℤ x18);
-{ uint64_t x23 = (x22 >> 0x1b);
-{ uint32_t x24 = (x22 & 0x7ffffff);
-{ ℤ x25 = (x23 +ℤ x17);
-{ uint64_t x26 = (x25 >> 0x1b);
-{ uint32_t x27 = (x25 & 0x7ffffff);
-{ uint64_t x28 = (x26 + x16);
-{ uint64_t x29 = (x28 >> 0x1b);
-{ uint32_t x30 = ((uint32_t)x28 & 0x7ffffff);
-{ uint64_t x31 = (x29 + x15);
-{ uint64_t x32 = (x31 >> 0x1b);
-{ uint32_t x33 = ((uint32_t)x31 & 0x7ffffff);
-{ uint64_t x34 = (x32 + x14);
-{ uint64_t x35 = (x34 >> 0x1b);
-{ uint32_t x36 = ((uint32_t)x34 & 0x7ffffff);
-{ uint64_t x37 = (x35 + x13);
-{ uint64_t x38 = (x37 >> 0x1b);
-{ uint32_t x39 = ((uint32_t)x37 & 0x7ffffff);
-{ uint64_t x40 = (x21 + (0x19 * x38));
-{ uint32_t x41 = (uint32_t) (x40 >> 0x1b);
-{ uint32_t x42 = ((uint32_t)x40 & 0x7ffffff);
-{ uint32_t x43 = (x41 + x24);
-{ uint32_t x44 = (x43 >> 0x1b);
-{ uint32_t x45 = (x43 & 0x7ffffff);
-out[0] = x39;
-out[1] = x36;
-out[2] = x33;
-out[3] = x30;
-out[4] = x44 + x27;
-out[5] = x45;
-out[6] = x42;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void fesquare(uint32_t out[7], const uint32_t in1[7]) {
+ { const uint32_t x11 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x13 = (((uint64_t)x2 * x11) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x11 * x2)))))));
+ { uint64_t x14 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x19 * ((uint64_t)x11 * x11)));
+ { uint64_t x15 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x19 * (((uint64_t)x12 * x11) + ((uint64_t)x11 * x12))));
+ { uint64_t x16 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x19 * (((uint64_t)x10 * x11) + (((uint64_t)x12 * x12) + ((uint64_t)x11 * x10)))));
+ { ℤ x17 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) +ℤ (0x19 *ℤ (((uint64_t)x8 * x11) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + ((uint64_t)x11 * x8))))));
+ { ℤ x18 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) +ℤ (0x19 *ℤ (((uint64_t)x6 * x11) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + ((uint64_t)x11 * x6)))))));
+ { ℤ x19 = (((uint64_t)x2 * x2) +ℤ (0x19 *ℤ (((uint64_t)x4 * x11) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + ((uint64_t)x11 * x4))))))));
+ { uint64_t x20 = (x19 >> 0x1b);
+ { uint32_t x21 = (x19 & 0x7ffffff);
+ { ℤ x22 = (x20 +ℤ x18);
+ { uint64_t x23 = (x22 >> 0x1b);
+ { uint32_t x24 = (x22 & 0x7ffffff);
+ { ℤ x25 = (x23 +ℤ x17);
+ { uint64_t x26 = (x25 >> 0x1b);
+ { uint32_t x27 = (x25 & 0x7ffffff);
+ { uint64_t x28 = (x26 + x16);
+ { uint64_t x29 = (x28 >> 0x1b);
+ { uint32_t x30 = ((uint32_t)x28 & 0x7ffffff);
+ { uint64_t x31 = (x29 + x15);
+ { uint64_t x32 = (x31 >> 0x1b);
+ { uint32_t x33 = ((uint32_t)x31 & 0x7ffffff);
+ { uint64_t x34 = (x32 + x14);
+ { uint64_t x35 = (x34 >> 0x1b);
+ { uint32_t x36 = ((uint32_t)x34 & 0x7ffffff);
+ { uint64_t x37 = (x35 + x13);
+ { uint64_t x38 = (x37 >> 0x1b);
+ { uint32_t x39 = ((uint32_t)x37 & 0x7ffffff);
+ { uint64_t x40 = (x21 + (0x19 * x38));
+ { uint32_t x41 = (uint32_t) (x40 >> 0x1b);
+ { uint32_t x42 = ((uint32_t)x40 & 0x7ffffff);
+ { uint32_t x43 = (x41 + x24);
+ { uint32_t x44 = (x43 >> 0x1b);
+ { uint32_t x45 = (x43 & 0x7ffffff);
+ out[0] = x42;
+ out[1] = x45;
+ out[2] = (x44 + x27);
+ out[3] = x30;
+ out[4] = x33;
+ out[5] = x36;
+ out[6] = x39;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e189m25/freeze.c b/src/Specific/solinas32_2e189m25/freeze.c
index 8314f828c..5abf7afb8 100644
--- a/src/Specific/solinas32_2e189m25/freeze.c
+++ b/src/Specific/solinas32_2e189m25/freeze.c
@@ -1,25 +1,39 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x14;
-out[1] = uint8_t x15 = Op Syntax.SubWithGetBorrow 27 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7ffffe7;;
+static void freeze(uint32_t out[7], const uint32_t in1[7]) {
+ { const uint32_t x11 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x7ffffe7);
+ { uint32_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x15, Return x4, 0x7ffffff);
+ { uint32_t x20, uint8_t x21 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x18, Return x6, 0x7ffffff);
+ { uint32_t x23, uint8_t x24 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x21, Return x8, 0x7ffffff);
+ { uint32_t x26, uint8_t x27 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x24, Return x10, 0x7ffffff);
+ { uint32_t x29, uint8_t x30 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x27, Return x12, 0x7ffffff);
+ { uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x30, Return x11, 0x7ffffff);
+ { uint32_t x34 = (uint32_t)cmovznz(x33, 0x0, 0xffffffff);
+ { uint32_t x35 = (x34 & 0x7ffffe7);
+ { uint32_t x37, uint8_t x38 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x14, Return x35);
+ { uint32_t x39 = (x34 & 0x7ffffff);
+ { uint32_t x41, uint8_t x42 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x38, Return x17, Return x39);
+ { uint32_t x43 = (x34 & 0x7ffffff);
+ { uint32_t x45, uint8_t x46 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x20, Return x43);
+ { uint32_t x47 = (x34 & 0x7ffffff);
+ { uint32_t x49, uint8_t x50 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x46, Return x23, Return x47);
+ { uint32_t x51 = (x34 & 0x7ffffff);
+ { uint32_t x53, uint8_t x54 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x50, Return x26, Return x51);
+ { uint32_t x55 = (x34 & 0x7ffffff);
+ { uint32_t x57, uint8_t x58 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x54, Return x29, Return x55);
+ { uint32_t x59 = (x34 & 0x7ffffff);
+ { uint32_t x61, uint8_t _ = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x58, Return x32, Return x59);
+ out[0] = x37;
+ out[1] = x41;
+ out[2] = x45;
+ out[3] = x49;
+ out[4] = x53;
+ out[5] = x57;
+ out[6] = x61;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e190m11/femul.c b/src/Specific/solinas32_2e190m11/femul.c
index 8b6631bbf..ed25ed8f1 100644
--- a/src/Specific/solinas32_2e190m11/femul.c
+++ b/src/Specific/solinas32_2e190m11/femul.c
@@ -1,61 +1,57 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint64_t x28 = (((uint64_t)x5 * x26) + ((0x2 * ((uint64_t)x7 * x27)) + ((0x2 * ((uint64_t)x9 * x25)) + ((0x2 * ((uint64_t)x11 * x23)) + ((0x2 * ((uint64_t)x13 * x21)) + ((0x2 * ((uint64_t)x15 * x19)) + ((uint64_t)x14 * x17)))))));
-{ uint64_t x29 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((0x2 * ((uint64_t)x11 * x21)) + ((0x2 * ((uint64_t)x13 * x19)) + ((uint64_t)x15 * x17)))))) + (0xb * ((uint64_t)x14 * x26)));
-{ uint64_t x30 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((0x2 * ((uint64_t)x9 * x21)) + ((0x2 * ((uint64_t)x11 * x19)) + ((uint64_t)x13 * x17))))) + (0xb * (((uint64_t)x15 * x26) + ((uint64_t)x14 * x27))));
-{ uint64_t x31 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + ((0x2 * ((uint64_t)x9 * x19)) + ((uint64_t)x11 * x17)))) + (0xb * (((uint64_t)x13 * x26) + (((uint64_t)x15 * x27) + ((uint64_t)x14 * x25)))));
-{ uint64_t x32 = ((((uint64_t)x5 * x21) + ((0x2 * ((uint64_t)x7 * x19)) + ((uint64_t)x9 * x17))) + (0xb * (((uint64_t)x11 * x26) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + ((uint64_t)x14 * x23))))));
-{ uint64_t x33 = ((((uint64_t)x5 * x19) + ((uint64_t)x7 * x17)) + (0xb * (((uint64_t)x9 * x26) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + ((uint64_t)x14 * x21)))))));
-{ ℤ x34 = (((uint64_t)x5 * x17) +ℤ (0xb *ℤ ((0x2 * ((uint64_t)x7 * x26)) + ((0x2 * ((uint64_t)x9 * x27)) + ((0x2 * ((uint64_t)x11 * x25)) + ((0x2 * ((uint64_t)x13 * x23)) + ((0x2 * ((uint64_t)x15 * x21)) + (0x2 * ((uint64_t)x14 * x19)))))))));
-{ uint64_t x35 = (x34 >> 0x1c);
-{ uint32_t x36 = (x34 & 0xfffffff);
-{ uint64_t x37 = (x35 + x33);
-{ uint64_t x38 = (x37 >> 0x1b);
-{ uint32_t x39 = ((uint32_t)x37 & 0x7ffffff);
-{ uint64_t x40 = (x38 + x32);
-{ uint64_t x41 = (x40 >> 0x1b);
-{ uint32_t x42 = ((uint32_t)x40 & 0x7ffffff);
-{ uint64_t x43 = (x41 + x31);
-{ uint64_t x44 = (x43 >> 0x1b);
-{ uint32_t x45 = ((uint32_t)x43 & 0x7ffffff);
-{ uint64_t x46 = (x44 + x30);
-{ uint64_t x47 = (x46 >> 0x1b);
-{ uint32_t x48 = ((uint32_t)x46 & 0x7ffffff);
-{ uint64_t x49 = (x47 + x29);
-{ uint64_t x50 = (x49 >> 0x1b);
-{ uint32_t x51 = ((uint32_t)x49 & 0x7ffffff);
-{ uint64_t x52 = (x50 + x28);
-{ uint64_t x53 = (x52 >> 0x1b);
-{ uint32_t x54 = ((uint32_t)x52 & 0x7ffffff);
-{ uint64_t x55 = (x36 + (0xb * x53));
-{ uint32_t x56 = (uint32_t) (x55 >> 0x1c);
-{ uint32_t x57 = ((uint32_t)x55 & 0xfffffff);
-{ uint32_t x58 = (x56 + x39);
-{ uint32_t x59 = (x58 >> 0x1b);
-{ uint32_t x60 = (x58 & 0x7ffffff);
-out[0] = x54;
-out[1] = x51;
-out[2] = x48;
-out[3] = x45;
-out[4] = x59 + x42;
-out[5] = x60;
-out[6] = x57;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void femul(uint32_t out[7], const uint32_t in1[7], const uint32_t in2[7]) {
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x26 = in2[6];
+ { const uint32_t x27 = in2[5];
+ { const uint32_t x25 = in2[4];
+ { const uint32_t x23 = in2[3];
+ { const uint32_t x21 = in2[2];
+ { const uint32_t x19 = in2[1];
+ { const uint32_t x17 = in2[0];
+ { uint64_t x28 = (((uint64_t)x5 * x26) + ((0x2 * ((uint64_t)x7 * x27)) + ((0x2 * ((uint64_t)x9 * x25)) + ((0x2 * ((uint64_t)x11 * x23)) + ((0x2 * ((uint64_t)x13 * x21)) + ((0x2 * ((uint64_t)x15 * x19)) + ((uint64_t)x14 * x17)))))));
+ { uint64_t x29 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((0x2 * ((uint64_t)x11 * x21)) + ((0x2 * ((uint64_t)x13 * x19)) + ((uint64_t)x15 * x17)))))) + (0xb * ((uint64_t)x14 * x26)));
+ { uint64_t x30 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((0x2 * ((uint64_t)x9 * x21)) + ((0x2 * ((uint64_t)x11 * x19)) + ((uint64_t)x13 * x17))))) + (0xb * (((uint64_t)x15 * x26) + ((uint64_t)x14 * x27))));
+ { uint64_t x31 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + ((0x2 * ((uint64_t)x9 * x19)) + ((uint64_t)x11 * x17)))) + (0xb * (((uint64_t)x13 * x26) + (((uint64_t)x15 * x27) + ((uint64_t)x14 * x25)))));
+ { uint64_t x32 = ((((uint64_t)x5 * x21) + ((0x2 * ((uint64_t)x7 * x19)) + ((uint64_t)x9 * x17))) + (0xb * (((uint64_t)x11 * x26) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + ((uint64_t)x14 * x23))))));
+ { uint64_t x33 = ((((uint64_t)x5 * x19) + ((uint64_t)x7 * x17)) + (0xb * (((uint64_t)x9 * x26) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + ((uint64_t)x14 * x21)))))));
+ { ℤ x34 = (((uint64_t)x5 * x17) +ℤ (0xb *ℤ ((0x2 * ((uint64_t)x7 * x26)) + ((0x2 * ((uint64_t)x9 * x27)) + ((0x2 * ((uint64_t)x11 * x25)) + ((0x2 * ((uint64_t)x13 * x23)) + ((0x2 * ((uint64_t)x15 * x21)) + (0x2 * ((uint64_t)x14 * x19)))))))));
+ { uint64_t x35 = (x34 >> 0x1c);
+ { uint32_t x36 = (x34 & 0xfffffff);
+ { uint64_t x37 = (x35 + x33);
+ { uint64_t x38 = (x37 >> 0x1b);
+ { uint32_t x39 = ((uint32_t)x37 & 0x7ffffff);
+ { uint64_t x40 = (x38 + x32);
+ { uint64_t x41 = (x40 >> 0x1b);
+ { uint32_t x42 = ((uint32_t)x40 & 0x7ffffff);
+ { uint64_t x43 = (x41 + x31);
+ { uint64_t x44 = (x43 >> 0x1b);
+ { uint32_t x45 = ((uint32_t)x43 & 0x7ffffff);
+ { uint64_t x46 = (x44 + x30);
+ { uint64_t x47 = (x46 >> 0x1b);
+ { uint32_t x48 = ((uint32_t)x46 & 0x7ffffff);
+ { uint64_t x49 = (x47 + x29);
+ { uint64_t x50 = (x49 >> 0x1b);
+ { uint32_t x51 = ((uint32_t)x49 & 0x7ffffff);
+ { uint64_t x52 = (x50 + x28);
+ { uint64_t x53 = (x52 >> 0x1b);
+ { uint32_t x54 = ((uint32_t)x52 & 0x7ffffff);
+ { uint64_t x55 = (x36 + (0xb * x53));
+ { uint32_t x56 = (uint32_t) (x55 >> 0x1c);
+ { uint32_t x57 = ((uint32_t)x55 & 0xfffffff);
+ { uint32_t x58 = (x56 + x39);
+ { uint32_t x59 = (x58 >> 0x1b);
+ { uint32_t x60 = (x58 & 0x7ffffff);
+ out[0] = x57;
+ out[1] = x60;
+ out[2] = (x59 + x42);
+ out[3] = x45;
+ out[4] = x48;
+ out[5] = x51;
+ out[6] = x54;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e190m11/fesquare.c b/src/Specific/solinas32_2e190m11/fesquare.c
index 3974e00c4..ece920735 100644
--- a/src/Specific/solinas32_2e190m11/fesquare.c
+++ b/src/Specific/solinas32_2e190m11/fesquare.c
@@ -1,61 +1,50 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x13 = (((uint64_t)x2 * x11) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x11 * x2)))))));
-{ uint64_t x14 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0xb * ((uint64_t)x11 * x11)));
-{ uint64_t x15 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0xb * (((uint64_t)x12 * x11) + ((uint64_t)x11 * x12))));
-{ uint64_t x16 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0xb * (((uint64_t)x10 * x11) + (((uint64_t)x12 * x12) + ((uint64_t)x11 * x10)))));
-{ uint64_t x17 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0xb * (((uint64_t)x8 * x11) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + ((uint64_t)x11 * x8))))));
-{ uint64_t x18 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0xb * (((uint64_t)x6 * x11) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + ((uint64_t)x11 * x6)))))));
-{ ℤ x19 = (((uint64_t)x2 * x2) +ℤ (0xb *ℤ ((0x2 * ((uint64_t)x4 * x11)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + (0x2 * ((uint64_t)x11 * x4)))))))));
-{ uint64_t x20 = (x19 >> 0x1c);
-{ uint32_t x21 = (x19 & 0xfffffff);
-{ uint64_t x22 = (x20 + x18);
-{ uint64_t x23 = (x22 >> 0x1b);
-{ uint32_t x24 = ((uint32_t)x22 & 0x7ffffff);
-{ uint64_t x25 = (x23 + x17);
-{ uint64_t x26 = (x25 >> 0x1b);
-{ uint32_t x27 = ((uint32_t)x25 & 0x7ffffff);
-{ uint64_t x28 = (x26 + x16);
-{ uint64_t x29 = (x28 >> 0x1b);
-{ uint32_t x30 = ((uint32_t)x28 & 0x7ffffff);
-{ uint64_t x31 = (x29 + x15);
-{ uint64_t x32 = (x31 >> 0x1b);
-{ uint32_t x33 = ((uint32_t)x31 & 0x7ffffff);
-{ uint64_t x34 = (x32 + x14);
-{ uint64_t x35 = (x34 >> 0x1b);
-{ uint32_t x36 = ((uint32_t)x34 & 0x7ffffff);
-{ uint64_t x37 = (x35 + x13);
-{ uint64_t x38 = (x37 >> 0x1b);
-{ uint32_t x39 = ((uint32_t)x37 & 0x7ffffff);
-{ uint64_t x40 = (x21 + (0xb * x38));
-{ uint32_t x41 = (uint32_t) (x40 >> 0x1c);
-{ uint32_t x42 = ((uint32_t)x40 & 0xfffffff);
-{ uint32_t x43 = (x41 + x24);
-{ uint32_t x44 = (x43 >> 0x1b);
-{ uint32_t x45 = (x43 & 0x7ffffff);
-out[0] = x39;
-out[1] = x36;
-out[2] = x33;
-out[3] = x30;
-out[4] = x44 + x27;
-out[5] = x45;
-out[6] = x42;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void fesquare(uint32_t out[7], const uint32_t in1[7]) {
+ { const uint32_t x11 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x13 = (((uint64_t)x2 * x11) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x11 * x2)))))));
+ { uint64_t x14 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0xb * ((uint64_t)x11 * x11)));
+ { uint64_t x15 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0xb * (((uint64_t)x12 * x11) + ((uint64_t)x11 * x12))));
+ { uint64_t x16 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0xb * (((uint64_t)x10 * x11) + (((uint64_t)x12 * x12) + ((uint64_t)x11 * x10)))));
+ { uint64_t x17 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0xb * (((uint64_t)x8 * x11) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + ((uint64_t)x11 * x8))))));
+ { uint64_t x18 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0xb * (((uint64_t)x6 * x11) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + ((uint64_t)x11 * x6)))))));
+ { ℤ x19 = (((uint64_t)x2 * x2) +ℤ (0xb *ℤ ((0x2 * ((uint64_t)x4 * x11)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + (0x2 * ((uint64_t)x11 * x4)))))))));
+ { uint64_t x20 = (x19 >> 0x1c);
+ { uint32_t x21 = (x19 & 0xfffffff);
+ { uint64_t x22 = (x20 + x18);
+ { uint64_t x23 = (x22 >> 0x1b);
+ { uint32_t x24 = ((uint32_t)x22 & 0x7ffffff);
+ { uint64_t x25 = (x23 + x17);
+ { uint64_t x26 = (x25 >> 0x1b);
+ { uint32_t x27 = ((uint32_t)x25 & 0x7ffffff);
+ { uint64_t x28 = (x26 + x16);
+ { uint64_t x29 = (x28 >> 0x1b);
+ { uint32_t x30 = ((uint32_t)x28 & 0x7ffffff);
+ { uint64_t x31 = (x29 + x15);
+ { uint64_t x32 = (x31 >> 0x1b);
+ { uint32_t x33 = ((uint32_t)x31 & 0x7ffffff);
+ { uint64_t x34 = (x32 + x14);
+ { uint64_t x35 = (x34 >> 0x1b);
+ { uint32_t x36 = ((uint32_t)x34 & 0x7ffffff);
+ { uint64_t x37 = (x35 + x13);
+ { uint64_t x38 = (x37 >> 0x1b);
+ { uint32_t x39 = ((uint32_t)x37 & 0x7ffffff);
+ { uint64_t x40 = (x21 + (0xb * x38));
+ { uint32_t x41 = (uint32_t) (x40 >> 0x1c);
+ { uint32_t x42 = ((uint32_t)x40 & 0xfffffff);
+ { uint32_t x43 = (x41 + x24);
+ { uint32_t x44 = (x43 >> 0x1b);
+ { uint32_t x45 = (x43 & 0x7ffffff);
+ out[0] = x42;
+ out[1] = x45;
+ out[2] = (x44 + x27);
+ out[3] = x30;
+ out[4] = x33;
+ out[5] = x36;
+ out[6] = x39;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e190m11/freeze.c b/src/Specific/solinas32_2e190m11/freeze.c
index b82e016e2..61f77c31f 100644
--- a/src/Specific/solinas32_2e190m11/freeze.c
+++ b/src/Specific/solinas32_2e190m11/freeze.c
@@ -1,25 +1,39 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x14;
-out[1] = uint8_t x15 = Op Syntax.SubWithGetBorrow 28 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffff5;;
+static void freeze(uint32_t out[7], const uint32_t in1[7]) {
+ { const uint32_t x11 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0xffffff5);
+ { uint32_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x15, Return x4, 0x7ffffff);
+ { uint32_t x20, uint8_t x21 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x18, Return x6, 0x7ffffff);
+ { uint32_t x23, uint8_t x24 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x21, Return x8, 0x7ffffff);
+ { uint32_t x26, uint8_t x27 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x24, Return x10, 0x7ffffff);
+ { uint32_t x29, uint8_t x30 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x27, Return x12, 0x7ffffff);
+ { uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x30, Return x11, 0x7ffffff);
+ { uint32_t x34 = (uint32_t)cmovznz(x33, 0x0, 0xffffffff);
+ { uint32_t x35 = (x34 & 0xffffff5);
+ { uint32_t x37, uint8_t x38 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x14, Return x35);
+ { uint32_t x39 = (x34 & 0x7ffffff);
+ { uint32_t x41, uint8_t x42 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x38, Return x17, Return x39);
+ { uint32_t x43 = (x34 & 0x7ffffff);
+ { uint32_t x45, uint8_t x46 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x20, Return x43);
+ { uint32_t x47 = (x34 & 0x7ffffff);
+ { uint32_t x49, uint8_t x50 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x46, Return x23, Return x47);
+ { uint32_t x51 = (x34 & 0x7ffffff);
+ { uint32_t x53, uint8_t x54 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x50, Return x26, Return x51);
+ { uint32_t x55 = (x34 & 0x7ffffff);
+ { uint32_t x57, uint8_t x58 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x54, Return x29, Return x55);
+ { uint32_t x59 = (x34 & 0x7ffffff);
+ { uint32_t x61, uint8_t _ = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x58, Return x32, Return x59);
+ out[0] = x37;
+ out[1] = x41;
+ out[2] = x45;
+ out[3] = x49;
+ out[4] = x53;
+ out[5] = x57;
+ out[6] = x61;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e191m19/femul.c b/src/Specific/solinas32_2e191m19/femul.c
index c4ef7a3cf..b5f6c1657 100644
--- a/src/Specific/solinas32_2e191m19/femul.c
+++ b/src/Specific/solinas32_2e191m19/femul.c
@@ -1,76 +1,78 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
-{ uint64_t x40 = (((uint64_t)x5 * x38) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((0x2 * ((uint64_t)x11 * x35)) + ((0x2 * ((uint64_t)x13 * x33)) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((0x2 * ((uint64_t)x19 * x27)) + ((0x2 * ((uint64_t)x21 * x25)) + ((uint64_t)x20 * x23))))))))));
-{ uint64_t x41 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + ((0x2 * ((uint64_t)x11 * x33)) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((0x2 * ((uint64_t)x17 * x27)) + ((0x2 * ((uint64_t)x19 * x25)) + ((uint64_t)x21 * x23))))))))) + (0x13 * ((uint64_t)x20 * x38)));
-{ uint64_t x42 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + ((0x2 * ((uint64_t)x9 * x33)) + ((0x2 * ((uint64_t)x11 * x31)) + ((0x2 * ((uint64_t)x13 * x29)) + ((0x2 * ((uint64_t)x15 * x27)) + ((0x2 * ((uint64_t)x17 * x25)) + ((uint64_t)x19 * x23)))))))) + (0x13 * (((uint64_t)x21 * x38) + ((uint64_t)x20 * x39))));
-{ uint64_t x43 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((0x2 * ((uint64_t)x13 * x27)) + ((0x2 * ((uint64_t)x15 * x25)) + ((uint64_t)x17 * x23))))))) + (0x13 * (((uint64_t)x19 * x38) + (((uint64_t)x21 * x39) + ((uint64_t)x20 * x37)))));
-{ uint64_t x44 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((0x2 * ((uint64_t)x11 * x27)) + ((0x2 * ((uint64_t)x13 * x25)) + ((uint64_t)x15 * x23)))))) + (0x13 * (((uint64_t)x17 * x38) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + ((uint64_t)x20 * x35))))));
-{ uint64_t x45 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((0x2 * ((uint64_t)x9 * x27)) + ((0x2 * ((uint64_t)x11 * x25)) + ((uint64_t)x13 * x23))))) + (0x13 * (((uint64_t)x15 * x38) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + ((uint64_t)x20 * x33)))))));
-{ uint64_t x46 = ((((uint64_t)x5 * x29) + ((0x2 * ((uint64_t)x7 * x27)) + ((0x2 * ((uint64_t)x9 * x25)) + ((uint64_t)x11 * x23)))) + (0x13 * (((uint64_t)x13 * x38) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((uint64_t)x20 * x31))))))));
-{ uint64_t x47 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((uint64_t)x9 * x23))) + (0x13 * (((uint64_t)x11 * x38) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((uint64_t)x20 * x29)))))))));
-{ uint64_t x48 = ((((uint64_t)x5 * x25) + ((uint64_t)x7 * x23)) + (0x13 * (((uint64_t)x9 * x38) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x20 * x27))))))))));
-{ uint64_t x49 = (((uint64_t)x5 * x23) + (0x13 * ((0x2 * ((uint64_t)x7 * x38)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((0x2 * ((uint64_t)x13 * x35)) + ((0x2 * ((uint64_t)x15 * x33)) + ((0x2 * ((uint64_t)x17 * x31)) + ((0x2 * ((uint64_t)x19 * x29)) + ((0x2 * ((uint64_t)x21 * x27)) + (0x2 * ((uint64_t)x20 * x25))))))))))));
-{ uint32_t x50 = (uint32_t) (x49 >> 0x14);
-{ uint32_t x51 = ((uint32_t)x49 & 0xfffff);
-{ uint64_t x52 = (x50 + x48);
-{ uint32_t x53 = (uint32_t) (x52 >> 0x13);
-{ uint32_t x54 = ((uint32_t)x52 & 0x7ffff);
-{ uint64_t x55 = (x53 + x47);
-{ uint32_t x56 = (uint32_t) (x55 >> 0x13);
-{ uint32_t x57 = ((uint32_t)x55 & 0x7ffff);
-{ uint64_t x58 = (x56 + x46);
-{ uint32_t x59 = (uint32_t) (x58 >> 0x13);
-{ uint32_t x60 = ((uint32_t)x58 & 0x7ffff);
-{ uint64_t x61 = (x59 + x45);
-{ uint32_t x62 = (uint32_t) (x61 >> 0x13);
-{ uint32_t x63 = ((uint32_t)x61 & 0x7ffff);
-{ uint64_t x64 = (x62 + x44);
-{ uint32_t x65 = (uint32_t) (x64 >> 0x13);
-{ uint32_t x66 = ((uint32_t)x64 & 0x7ffff);
-{ uint64_t x67 = (x65 + x43);
-{ uint32_t x68 = (uint32_t) (x67 >> 0x13);
-{ uint32_t x69 = ((uint32_t)x67 & 0x7ffff);
-{ uint64_t x70 = (x68 + x42);
-{ uint32_t x71 = (uint32_t) (x70 >> 0x13);
-{ uint32_t x72 = ((uint32_t)x70 & 0x7ffff);
-{ uint64_t x73 = (x71 + x41);
-{ uint32_t x74 = (uint32_t) (x73 >> 0x13);
-{ uint32_t x75 = ((uint32_t)x73 & 0x7ffff);
-{ uint64_t x76 = (x74 + x40);
-{ uint32_t x77 = (uint32_t) (x76 >> 0x13);
-{ uint32_t x78 = ((uint32_t)x76 & 0x7ffff);
-{ uint32_t x79 = (x51 + (0x13 * x77));
-{ uint32_t x80 = (x79 >> 0x14);
-{ uint32_t x81 = (x79 & 0xfffff);
-{ uint32_t x82 = (x80 + x54);
-{ uint32_t x83 = (x82 >> 0x13);
-{ uint32_t x84 = (x82 & 0x7ffff);
-out[0] = x78;
-out[1] = x75;
-out[2] = x72;
-out[3] = x69;
-out[4] = x66;
-out[5] = x63;
-out[6] = x60;
-out[7] = x83 + x57;
-out[8] = x84;
-out[9] = x81;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void femul(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) {
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x38 = in2[9];
+ { const uint32_t x39 = in2[8];
+ { const uint32_t x37 = in2[7];
+ { const uint32_t x35 = in2[6];
+ { const uint32_t x33 = in2[5];
+ { const uint32_t x31 = in2[4];
+ { const uint32_t x29 = in2[3];
+ { const uint32_t x27 = in2[2];
+ { const uint32_t x25 = in2[1];
+ { const uint32_t x23 = in2[0];
+ { uint64_t x40 = (((uint64_t)x5 * x38) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((0x2 * ((uint64_t)x11 * x35)) + ((0x2 * ((uint64_t)x13 * x33)) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((0x2 * ((uint64_t)x19 * x27)) + ((0x2 * ((uint64_t)x21 * x25)) + ((uint64_t)x20 * x23))))))))));
+ { uint64_t x41 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + ((0x2 * ((uint64_t)x11 * x33)) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((0x2 * ((uint64_t)x17 * x27)) + ((0x2 * ((uint64_t)x19 * x25)) + ((uint64_t)x21 * x23))))))))) + (0x13 * ((uint64_t)x20 * x38)));
+ { uint64_t x42 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + ((0x2 * ((uint64_t)x9 * x33)) + ((0x2 * ((uint64_t)x11 * x31)) + ((0x2 * ((uint64_t)x13 * x29)) + ((0x2 * ((uint64_t)x15 * x27)) + ((0x2 * ((uint64_t)x17 * x25)) + ((uint64_t)x19 * x23)))))))) + (0x13 * (((uint64_t)x21 * x38) + ((uint64_t)x20 * x39))));
+ { uint64_t x43 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((0x2 * ((uint64_t)x13 * x27)) + ((0x2 * ((uint64_t)x15 * x25)) + ((uint64_t)x17 * x23))))))) + (0x13 * (((uint64_t)x19 * x38) + (((uint64_t)x21 * x39) + ((uint64_t)x20 * x37)))));
+ { uint64_t x44 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((0x2 * ((uint64_t)x11 * x27)) + ((0x2 * ((uint64_t)x13 * x25)) + ((uint64_t)x15 * x23)))))) + (0x13 * (((uint64_t)x17 * x38) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + ((uint64_t)x20 * x35))))));
+ { uint64_t x45 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((0x2 * ((uint64_t)x9 * x27)) + ((0x2 * ((uint64_t)x11 * x25)) + ((uint64_t)x13 * x23))))) + (0x13 * (((uint64_t)x15 * x38) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + ((uint64_t)x20 * x33)))))));
+ { uint64_t x46 = ((((uint64_t)x5 * x29) + ((0x2 * ((uint64_t)x7 * x27)) + ((0x2 * ((uint64_t)x9 * x25)) + ((uint64_t)x11 * x23)))) + (0x13 * (((uint64_t)x13 * x38) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((uint64_t)x20 * x31))))))));
+ { uint64_t x47 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((uint64_t)x9 * x23))) + (0x13 * (((uint64_t)x11 * x38) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((uint64_t)x20 * x29)))))))));
+ { uint64_t x48 = ((((uint64_t)x5 * x25) + ((uint64_t)x7 * x23)) + (0x13 * (((uint64_t)x9 * x38) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x20 * x27))))))))));
+ { uint64_t x49 = (((uint64_t)x5 * x23) + (0x13 * ((0x2 * ((uint64_t)x7 * x38)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((0x2 * ((uint64_t)x13 * x35)) + ((0x2 * ((uint64_t)x15 * x33)) + ((0x2 * ((uint64_t)x17 * x31)) + ((0x2 * ((uint64_t)x19 * x29)) + ((0x2 * ((uint64_t)x21 * x27)) + (0x2 * ((uint64_t)x20 * x25))))))))))));
+ { uint32_t x50 = (uint32_t) (x49 >> 0x14);
+ { uint32_t x51 = ((uint32_t)x49 & 0xfffff);
+ { uint64_t x52 = (x50 + x48);
+ { uint32_t x53 = (uint32_t) (x52 >> 0x13);
+ { uint32_t x54 = ((uint32_t)x52 & 0x7ffff);
+ { uint64_t x55 = (x53 + x47);
+ { uint32_t x56 = (uint32_t) (x55 >> 0x13);
+ { uint32_t x57 = ((uint32_t)x55 & 0x7ffff);
+ { uint64_t x58 = (x56 + x46);
+ { uint32_t x59 = (uint32_t) (x58 >> 0x13);
+ { uint32_t x60 = ((uint32_t)x58 & 0x7ffff);
+ { uint64_t x61 = (x59 + x45);
+ { uint32_t x62 = (uint32_t) (x61 >> 0x13);
+ { uint32_t x63 = ((uint32_t)x61 & 0x7ffff);
+ { uint64_t x64 = (x62 + x44);
+ { uint32_t x65 = (uint32_t) (x64 >> 0x13);
+ { uint32_t x66 = ((uint32_t)x64 & 0x7ffff);
+ { uint64_t x67 = (x65 + x43);
+ { uint32_t x68 = (uint32_t) (x67 >> 0x13);
+ { uint32_t x69 = ((uint32_t)x67 & 0x7ffff);
+ { uint64_t x70 = (x68 + x42);
+ { uint32_t x71 = (uint32_t) (x70 >> 0x13);
+ { uint32_t x72 = ((uint32_t)x70 & 0x7ffff);
+ { uint64_t x73 = (x71 + x41);
+ { uint32_t x74 = (uint32_t) (x73 >> 0x13);
+ { uint32_t x75 = ((uint32_t)x73 & 0x7ffff);
+ { uint64_t x76 = (x74 + x40);
+ { uint32_t x77 = (uint32_t) (x76 >> 0x13);
+ { uint32_t x78 = ((uint32_t)x76 & 0x7ffff);
+ { uint32_t x79 = (x51 + (0x13 * x77));
+ { uint32_t x80 = (x79 >> 0x14);
+ { uint32_t x81 = (x79 & 0xfffff);
+ { uint32_t x82 = (x80 + x54);
+ { uint32_t x83 = (x82 >> 0x13);
+ { uint32_t x84 = (x82 & 0x7ffff);
+ out[0] = x81;
+ out[1] = x84;
+ out[2] = (x83 + x57);
+ out[3] = x60;
+ out[4] = x63;
+ out[5] = x66;
+ out[6] = x69;
+ out[7] = x72;
+ out[8] = x75;
+ out[9] = x78;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e191m19/fesquare.c b/src/Specific/solinas32_2e191m19/fesquare.c
index a26b852d2..095228c4c 100644
--- a/src/Specific/solinas32_2e191m19/fesquare.c
+++ b/src/Specific/solinas32_2e191m19/fesquare.c
@@ -1,76 +1,68 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x19 = (((uint64_t)x2 * x17) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x17 * x2))))))))));
-{ uint64_t x20 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x13 * ((uint64_t)x17 * x17)));
-{ uint64_t x21 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x13 * (((uint64_t)x18 * x17) + ((uint64_t)x17 * x18))));
-{ uint64_t x22 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x13 * (((uint64_t)x16 * x17) + (((uint64_t)x18 * x18) + ((uint64_t)x17 * x16)))));
-{ uint64_t x23 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x13 * (((uint64_t)x14 * x17) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((uint64_t)x17 * x14))))));
-{ uint64_t x24 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x13 * (((uint64_t)x12 * x17) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + ((uint64_t)x17 * x12)))))));
-{ uint64_t x25 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x13 * (((uint64_t)x10 * x17) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + ((uint64_t)x17 * x10))))))));
-{ uint64_t x26 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x13 * (((uint64_t)x8 * x17) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + ((uint64_t)x17 * x8)))))))));
-{ uint64_t x27 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x13 * (((uint64_t)x6 * x17) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((uint64_t)x17 * x6))))))))));
-{ uint64_t x28 = (((uint64_t)x2 * x2) + (0x13 * ((0x2 * ((uint64_t)x4 * x17)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + (0x2 * ((uint64_t)x17 * x4))))))))))));
-{ uint32_t x29 = (uint32_t) (x28 >> 0x14);
-{ uint32_t x30 = ((uint32_t)x28 & 0xfffff);
-{ uint64_t x31 = (x29 + x27);
-{ uint32_t x32 = (uint32_t) (x31 >> 0x13);
-{ uint32_t x33 = ((uint32_t)x31 & 0x7ffff);
-{ uint64_t x34 = (x32 + x26);
-{ uint32_t x35 = (uint32_t) (x34 >> 0x13);
-{ uint32_t x36 = ((uint32_t)x34 & 0x7ffff);
-{ uint64_t x37 = (x35 + x25);
-{ uint32_t x38 = (uint32_t) (x37 >> 0x13);
-{ uint32_t x39 = ((uint32_t)x37 & 0x7ffff);
-{ uint64_t x40 = (x38 + x24);
-{ uint32_t x41 = (uint32_t) (x40 >> 0x13);
-{ uint32_t x42 = ((uint32_t)x40 & 0x7ffff);
-{ uint64_t x43 = (x41 + x23);
-{ uint32_t x44 = (uint32_t) (x43 >> 0x13);
-{ uint32_t x45 = ((uint32_t)x43 & 0x7ffff);
-{ uint64_t x46 = (x44 + x22);
-{ uint32_t x47 = (uint32_t) (x46 >> 0x13);
-{ uint32_t x48 = ((uint32_t)x46 & 0x7ffff);
-{ uint64_t x49 = (x47 + x21);
-{ uint32_t x50 = (uint32_t) (x49 >> 0x13);
-{ uint32_t x51 = ((uint32_t)x49 & 0x7ffff);
-{ uint64_t x52 = (x50 + x20);
-{ uint32_t x53 = (uint32_t) (x52 >> 0x13);
-{ uint32_t x54 = ((uint32_t)x52 & 0x7ffff);
-{ uint64_t x55 = (x53 + x19);
-{ uint32_t x56 = (uint32_t) (x55 >> 0x13);
-{ uint32_t x57 = ((uint32_t)x55 & 0x7ffff);
-{ uint32_t x58 = (x30 + (0x13 * x56));
-{ uint32_t x59 = (x58 >> 0x14);
-{ uint32_t x60 = (x58 & 0xfffff);
-{ uint32_t x61 = (x59 + x33);
-{ uint32_t x62 = (x61 >> 0x13);
-{ uint32_t x63 = (x61 & 0x7ffff);
-out[0] = x57;
-out[1] = x54;
-out[2] = x51;
-out[3] = x48;
-out[4] = x45;
-out[5] = x42;
-out[6] = x39;
-out[7] = x62 + x36;
-out[8] = x63;
-out[9] = x60;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void fesquare(uint32_t out[10], const uint32_t in1[10]) {
+ { const uint32_t x17 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x19 = (((uint64_t)x2 * x17) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x17 * x2))))))))));
+ { uint64_t x20 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x13 * ((uint64_t)x17 * x17)));
+ { uint64_t x21 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x13 * (((uint64_t)x18 * x17) + ((uint64_t)x17 * x18))));
+ { uint64_t x22 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x13 * (((uint64_t)x16 * x17) + (((uint64_t)x18 * x18) + ((uint64_t)x17 * x16)))));
+ { uint64_t x23 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x13 * (((uint64_t)x14 * x17) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((uint64_t)x17 * x14))))));
+ { uint64_t x24 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x13 * (((uint64_t)x12 * x17) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + ((uint64_t)x17 * x12)))))));
+ { uint64_t x25 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x13 * (((uint64_t)x10 * x17) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + ((uint64_t)x17 * x10))))))));
+ { uint64_t x26 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x13 * (((uint64_t)x8 * x17) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + ((uint64_t)x17 * x8)))))))));
+ { uint64_t x27 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x13 * (((uint64_t)x6 * x17) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((uint64_t)x17 * x6))))))))));
+ { uint64_t x28 = (((uint64_t)x2 * x2) + (0x13 * ((0x2 * ((uint64_t)x4 * x17)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + (0x2 * ((uint64_t)x17 * x4))))))))))));
+ { uint32_t x29 = (uint32_t) (x28 >> 0x14);
+ { uint32_t x30 = ((uint32_t)x28 & 0xfffff);
+ { uint64_t x31 = (x29 + x27);
+ { uint32_t x32 = (uint32_t) (x31 >> 0x13);
+ { uint32_t x33 = ((uint32_t)x31 & 0x7ffff);
+ { uint64_t x34 = (x32 + x26);
+ { uint32_t x35 = (uint32_t) (x34 >> 0x13);
+ { uint32_t x36 = ((uint32_t)x34 & 0x7ffff);
+ { uint64_t x37 = (x35 + x25);
+ { uint32_t x38 = (uint32_t) (x37 >> 0x13);
+ { uint32_t x39 = ((uint32_t)x37 & 0x7ffff);
+ { uint64_t x40 = (x38 + x24);
+ { uint32_t x41 = (uint32_t) (x40 >> 0x13);
+ { uint32_t x42 = ((uint32_t)x40 & 0x7ffff);
+ { uint64_t x43 = (x41 + x23);
+ { uint32_t x44 = (uint32_t) (x43 >> 0x13);
+ { uint32_t x45 = ((uint32_t)x43 & 0x7ffff);
+ { uint64_t x46 = (x44 + x22);
+ { uint32_t x47 = (uint32_t) (x46 >> 0x13);
+ { uint32_t x48 = ((uint32_t)x46 & 0x7ffff);
+ { uint64_t x49 = (x47 + x21);
+ { uint32_t x50 = (uint32_t) (x49 >> 0x13);
+ { uint32_t x51 = ((uint32_t)x49 & 0x7ffff);
+ { uint64_t x52 = (x50 + x20);
+ { uint32_t x53 = (uint32_t) (x52 >> 0x13);
+ { uint32_t x54 = ((uint32_t)x52 & 0x7ffff);
+ { uint64_t x55 = (x53 + x19);
+ { uint32_t x56 = (uint32_t) (x55 >> 0x13);
+ { uint32_t x57 = ((uint32_t)x55 & 0x7ffff);
+ { uint32_t x58 = (x30 + (0x13 * x56));
+ { uint32_t x59 = (x58 >> 0x14);
+ { uint32_t x60 = (x58 & 0xfffff);
+ { uint32_t x61 = (x59 + x33);
+ { uint32_t x62 = (x61 >> 0x13);
+ { uint32_t x63 = (x61 & 0x7ffff);
+ out[0] = x60;
+ out[1] = x63;
+ out[2] = (x62 + x36);
+ out[3] = x39;
+ out[4] = x42;
+ out[5] = x45;
+ out[6] = x48;
+ out[7] = x51;
+ out[8] = x54;
+ out[9] = x57;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e191m19/freeze.c b/src/Specific/solinas32_2e191m19/freeze.c
index d015ecc2b..440e3f628 100644
--- a/src/Specific/solinas32_2e191m19/freeze.c
+++ b/src/Specific/solinas32_2e191m19/freeze.c
@@ -1,25 +1,54 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x20;
-out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 20 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xfffed;;
+static void freeze(uint32_t out[10], const uint32_t in1[10]) {
+ { const uint32_t x17 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x20, uint8_t x21 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0xfffed);
+ { uint32_t x23, uint8_t x24 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x21, Return x4, 0x7ffff);
+ { uint32_t x26, uint8_t x27 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x24, Return x6, 0x7ffff);
+ { uint32_t x29, uint8_t x30 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x27, Return x8, 0x7ffff);
+ { uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x30, Return x10, 0x7ffff);
+ { uint32_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x12, 0x7ffff);
+ { uint32_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x36, Return x14, 0x7ffff);
+ { uint32_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x39, Return x16, 0x7ffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x18, 0x7ffff);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x17, 0x7ffff);
+ { uint32_t x49 = (uint32_t)cmovznz(x48, 0x0, 0xffffffff);
+ { uint32_t x50 = (x49 & 0xfffed);
+ { uint32_t x52, uint8_t x53 = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x20, Return x50);
+ { uint32_t x54 = (x49 & 0x7ffff);
+ { uint32_t x56, uint8_t x57 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x53, Return x23, Return x54);
+ { uint32_t x58 = (x49 & 0x7ffff);
+ { uint32_t x60, uint8_t x61 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x26, Return x58);
+ { uint32_t x62 = (x49 & 0x7ffff);
+ { uint32_t x64, uint8_t x65 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x61, Return x29, Return x62);
+ { uint32_t x66 = (x49 & 0x7ffff);
+ { uint32_t x68, uint8_t x69 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x65, Return x32, Return x66);
+ { uint32_t x70 = (x49 & 0x7ffff);
+ { uint32_t x72, uint8_t x73 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x69, Return x35, Return x70);
+ { uint32_t x74 = (x49 & 0x7ffff);
+ { uint32_t x76, uint8_t x77 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x73, Return x38, Return x74);
+ { uint32_t x78 = (x49 & 0x7ffff);
+ { uint32_t x80, uint8_t x81 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x77, Return x41, Return x78);
+ { uint32_t x82 = (x49 & 0x7ffff);
+ { uint32_t x84, uint8_t x85 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x81, Return x44, Return x82);
+ { uint32_t x86 = (x49 & 0x7ffff);
+ { uint32_t x88, uint8_t _ = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x85, Return x47, Return x86);
+ out[0] = x52;
+ out[1] = x56;
+ out[2] = x60;
+ out[3] = x64;
+ out[4] = x68;
+ out[5] = x72;
+ out[6] = x76;
+ out[7] = x80;
+ out[8] = x84;
+ out[9] = x88;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e192m2e64m1/femul.c b/src/Specific/solinas32_2e192m2e64m1/femul.c
index b05266d7c..766b7a024 100644
--- a/src/Specific/solinas32_2e192m2e64m1/femul.c
+++ b/src/Specific/solinas32_2e192m2e64m1/femul.c
@@ -1,77 +1,75 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ ℤ x32 = ((((uint64_t)x5 * x30) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + (((uint64_t)x17 * x21) + ((uint64_t)x16 * x19)))))))) +ℤ ((0x10000 *ℤ (((uint64_t)x17 * x30) + ((uint64_t)x16 * x31))) +ℤ (0x10000000000 *ℤ ((uint64_t)x16 * x30))));
-{ ℤ x33 = ((((uint64_t)x5 * x31) + (((uint64_t)x7 * x29) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + (((uint64_t)x15 * x21) + ((uint64_t)x17 * x19))))))) +ℤ (((uint64_t)x16 * x30) +ℤ (0x10000 *ℤ (((uint64_t)x15 * x30) + (((uint64_t)x17 * x31) + ((uint64_t)x16 * x29))))));
-{ ℤ x34 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x15 * x19)))))) +ℤ ((((uint64_t)x17 * x30) + ((uint64_t)x16 * x31)) +ℤ (0x10000 *ℤ (((uint64_t)x13 * x30) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x16 * x27)))))));
-{ ℤ x35 = ((((uint64_t)x5 * x27) + (((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + (((uint64_t)x11 * x21) + ((uint64_t)x13 * x19))))) +ℤ ((((uint64_t)x15 * x30) + (((uint64_t)x17 * x31) + ((uint64_t)x16 * x29))) +ℤ (0x10000 *ℤ (((uint64_t)x11 * x30) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + (((uint64_t)x17 * x27) + ((uint64_t)x16 * x25))))))));
-{ ℤ x36 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + ((uint64_t)x11 * x19)))) +ℤ ((((uint64_t)x13 * x30) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x16 * x27)))) +ℤ (0x10000 *ℤ (((uint64_t)x9 * x30) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x16 * x23)))))))));
-{ ℤ x37 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + ((uint64_t)x9 * x19))) +ℤ ((((uint64_t)x11 * x30) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + (((uint64_t)x17 * x27) + ((uint64_t)x16 * x25))))) +ℤ (0x10000 *ℤ (((uint64_t)x7 * x30) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x16 * x21))))))))));
-{ uint64_t x38 = ((((uint64_t)x5 * x21) + ((uint64_t)x7 * x19)) + (((uint64_t)x9 * x30) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x16 * x23)))))));
-{ uint64_t x39 = (((uint64_t)x5 * x19) + (((uint64_t)x7 * x30) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x16 * x21))))))));
-{ uint32_t x40 = (uint32_t) (x38 >> 0x18);
-{ uint32_t x41 = ((uint32_t)x38 & 0xffffff);
-{ ℤ x42 = (x32 >>ℤ 0x18);
-{ uint32_t x43 = (x32 & 0xffffff);
-{ ℤ x44 = ((0x1000000 *ℤ x42) +ℤ x43);
-{ ℤ x45 = (x44 >>ℤ 0x18);
-{ uint32_t x46 = (x44 & 0xffffff);
-{ ℤ x47 = ((x40 +ℤ x37) +ℤ (0x10000 *ℤ x45));
-{ uint64_t x48 = (x47 >> 0x18);
-{ uint32_t x49 = (x47 & 0xffffff);
-{ ℤ x50 = (x39 +ℤ x45);
-{ uint64_t x51 = (x50 >> 0x18);
-{ uint32_t x52 = (x50 & 0xffffff);
-{ ℤ x53 = (x48 +ℤ x36);
-{ uint64_t x54 = (x53 >> 0x18);
-{ uint32_t x55 = (x53 & 0xffffff);
-{ uint64_t x56 = (x51 + x41);
-{ uint32_t x57 = (uint32_t) (x56 >> 0x18);
-{ uint32_t x58 = ((uint32_t)x56 & 0xffffff);
-{ ℤ x59 = (x54 +ℤ x35);
-{ uint64_t x60 = (x59 >> 0x18);
-{ uint32_t x61 = (x59 & 0xffffff);
-{ ℤ x62 = (x60 +ℤ x34);
-{ uint64_t x63 = (x62 >> 0x18);
-{ uint32_t x64 = (x62 & 0xffffff);
-{ ℤ x65 = (x63 +ℤ x33);
-{ uint64_t x66 = (x65 >> 0x18);
-{ uint32_t x67 = (x65 & 0xffffff);
-{ uint64_t x68 = (x66 + x46);
-{ uint32_t x69 = (uint32_t) (x68 >> 0x18);
-{ uint32_t x70 = ((uint32_t)x68 & 0xffffff);
-{ uint64_t x71 = (((uint64_t)0x1000000 * x69) + x70);
-{ uint32_t x72 = (uint32_t) (x71 >> 0x18);
-{ uint32_t x73 = ((uint32_t)x71 & 0xffffff);
-{ uint64_t x74 = ((x57 + x49) + ((uint64_t)0x10000 * x72));
-{ uint32_t x75 = (uint32_t) (x74 >> 0x18);
-{ uint32_t x76 = ((uint32_t)x74 & 0xffffff);
-{ uint32_t x77 = (x52 + x72);
-{ uint32_t x78 = (x77 >> 0x18);
-{ uint32_t x79 = (x77 & 0xffffff);
-out[0] = x73;
-out[1] = x67;
-out[2] = x64;
-out[3] = x61;
-out[4] = x75 + x55;
-out[5] = x76;
-out[6] = x78 + x58;
-out[7] = x79;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void femul(uint32_t out[8], const uint32_t in1[8], const uint32_t in2[8]) {
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x30 = in2[7];
+ { const uint32_t x31 = in2[6];
+ { const uint32_t x29 = in2[5];
+ { const uint32_t x27 = in2[4];
+ { const uint32_t x25 = in2[3];
+ { const uint32_t x23 = in2[2];
+ { const uint32_t x21 = in2[1];
+ { const uint32_t x19 = in2[0];
+ { ℤ x32 = ((((uint64_t)x5 * x30) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + (((uint64_t)x17 * x21) + ((uint64_t)x16 * x19)))))))) +ℤ ((0x10000 *ℤ (((uint64_t)x17 * x30) + ((uint64_t)x16 * x31))) +ℤ (0x10000000000 *ℤ ((uint64_t)x16 * x30))));
+ { ℤ x33 = ((((uint64_t)x5 * x31) + (((uint64_t)x7 * x29) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + (((uint64_t)x15 * x21) + ((uint64_t)x17 * x19))))))) +ℤ (((uint64_t)x16 * x30) +ℤ (0x10000 *ℤ (((uint64_t)x15 * x30) + (((uint64_t)x17 * x31) + ((uint64_t)x16 * x29))))));
+ { ℤ x34 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x15 * x19)))))) +ℤ ((((uint64_t)x17 * x30) + ((uint64_t)x16 * x31)) +ℤ (0x10000 *ℤ (((uint64_t)x13 * x30) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x16 * x27)))))));
+ { ℤ x35 = ((((uint64_t)x5 * x27) + (((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + (((uint64_t)x11 * x21) + ((uint64_t)x13 * x19))))) +ℤ ((((uint64_t)x15 * x30) + (((uint64_t)x17 * x31) + ((uint64_t)x16 * x29))) +ℤ (0x10000 *ℤ (((uint64_t)x11 * x30) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + (((uint64_t)x17 * x27) + ((uint64_t)x16 * x25))))))));
+ { ℤ x36 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + ((uint64_t)x11 * x19)))) +ℤ ((((uint64_t)x13 * x30) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x16 * x27)))) +ℤ (0x10000 *ℤ (((uint64_t)x9 * x30) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x16 * x23)))))))));
+ { ℤ x37 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + ((uint64_t)x9 * x19))) +ℤ ((((uint64_t)x11 * x30) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + (((uint64_t)x17 * x27) + ((uint64_t)x16 * x25))))) +ℤ (0x10000 *ℤ (((uint64_t)x7 * x30) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x16 * x21))))))))));
+ { uint64_t x38 = ((((uint64_t)x5 * x21) + ((uint64_t)x7 * x19)) + (((uint64_t)x9 * x30) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x16 * x23)))))));
+ { uint64_t x39 = (((uint64_t)x5 * x19) + (((uint64_t)x7 * x30) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x16 * x21))))))));
+ { uint32_t x40 = (uint32_t) (x38 >> 0x18);
+ { uint32_t x41 = ((uint32_t)x38 & 0xffffff);
+ { ℤ x42 = (x32 >>ℤ 0x18);
+ { uint32_t x43 = (x32 & 0xffffff);
+ { ℤ x44 = ((0x1000000 *ℤ x42) +ℤ x43);
+ { ℤ x45 = (x44 >>ℤ 0x18);
+ { uint32_t x46 = (x44 & 0xffffff);
+ { ℤ x47 = ((x40 +ℤ x37) +ℤ (0x10000 *ℤ x45));
+ { uint64_t x48 = (x47 >> 0x18);
+ { uint32_t x49 = (x47 & 0xffffff);
+ { ℤ x50 = (x39 +ℤ x45);
+ { uint64_t x51 = (x50 >> 0x18);
+ { uint32_t x52 = (x50 & 0xffffff);
+ { ℤ x53 = (x48 +ℤ x36);
+ { uint64_t x54 = (x53 >> 0x18);
+ { uint32_t x55 = (x53 & 0xffffff);
+ { uint64_t x56 = (x51 + x41);
+ { uint32_t x57 = (uint32_t) (x56 >> 0x18);
+ { uint32_t x58 = ((uint32_t)x56 & 0xffffff);
+ { ℤ x59 = (x54 +ℤ x35);
+ { uint64_t x60 = (x59 >> 0x18);
+ { uint32_t x61 = (x59 & 0xffffff);
+ { ℤ x62 = (x60 +ℤ x34);
+ { uint64_t x63 = (x62 >> 0x18);
+ { uint32_t x64 = (x62 & 0xffffff);
+ { ℤ x65 = (x63 +ℤ x33);
+ { uint64_t x66 = (x65 >> 0x18);
+ { uint32_t x67 = (x65 & 0xffffff);
+ { uint64_t x68 = (x66 + x46);
+ { uint32_t x69 = (uint32_t) (x68 >> 0x18);
+ { uint32_t x70 = ((uint32_t)x68 & 0xffffff);
+ { uint64_t x71 = (((uint64_t)0x1000000 * x69) + x70);
+ { uint32_t x72 = (uint32_t) (x71 >> 0x18);
+ { uint32_t x73 = ((uint32_t)x71 & 0xffffff);
+ { uint64_t x74 = ((x57 + x49) + ((uint64_t)0x10000 * x72));
+ { uint32_t x75 = (uint32_t) (x74 >> 0x18);
+ { uint32_t x76 = ((uint32_t)x74 & 0xffffff);
+ { uint32_t x77 = (x52 + x72);
+ { uint32_t x78 = (x77 >> 0x18);
+ { uint32_t x79 = (x77 & 0xffffff);
+ out[0] = x79;
+ out[1] = (x78 + x58);
+ out[2] = x76;
+ out[3] = (x75 + x55);
+ out[4] = x61;
+ out[5] = x64;
+ out[6] = x67;
+ out[7] = x73;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e192m2e64m1/fesquare.c b/src/Specific/solinas32_2e192m2e64m1/fesquare.c
index 9beed8725..7d8009ef3 100644
--- a/src/Specific/solinas32_2e192m2e64m1/fesquare.c
+++ b/src/Specific/solinas32_2e192m2e64m1/fesquare.c
@@ -1,77 +1,67 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ ℤ x15 = ((((uint64_t)x2 * x13) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x13 * x2)))))))) +ℤ ((0x10000 *ℤ (((uint64_t)x14 * x13) + ((uint64_t)x13 * x14))) +ℤ (0x10000000000 *ℤ ((uint64_t)x13 * x13))));
-{ ℤ x16 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) +ℤ (((uint64_t)x13 * x13) +ℤ (0x10000 *ℤ (((uint64_t)x12 * x13) + (((uint64_t)x14 * x14) + ((uint64_t)x13 * x12))))));
-{ ℤ x17 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) +ℤ ((((uint64_t)x14 * x13) + ((uint64_t)x13 * x14)) +ℤ (0x10000 *ℤ (((uint64_t)x10 * x13) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x13 * x10)))))));
-{ ℤ x18 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) +ℤ ((((uint64_t)x12 * x13) + (((uint64_t)x14 * x14) + ((uint64_t)x13 * x12))) +ℤ (0x10000 *ℤ (((uint64_t)x8 * x13) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x13 * x8))))))));
-{ ℤ x19 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) +ℤ ((((uint64_t)x10 * x13) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x13 * x10)))) +ℤ (0x10000 *ℤ (((uint64_t)x6 * x13) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x13 * x6)))))))));
-{ ℤ x20 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) +ℤ ((((uint64_t)x8 * x13) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x13 * x8))))) +ℤ (0x10000 *ℤ (((uint64_t)x4 * x13) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((uint64_t)x13 * x4))))))))));
-{ uint64_t x21 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (((uint64_t)x6 * x13) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x13 * x6)))))));
-{ uint64_t x22 = (((uint64_t)x2 * x2) + (((uint64_t)x4 * x13) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((uint64_t)x13 * x4))))))));
-{ uint32_t x23 = (uint32_t) (x21 >> 0x18);
-{ uint32_t x24 = ((uint32_t)x21 & 0xffffff);
-{ ℤ x25 = (x15 >>ℤ 0x18);
-{ uint32_t x26 = (x15 & 0xffffff);
-{ ℤ x27 = ((0x1000000 *ℤ x25) +ℤ x26);
-{ ℤ x28 = (x27 >>ℤ 0x18);
-{ uint32_t x29 = (x27 & 0xffffff);
-{ ℤ x30 = ((x23 +ℤ x20) +ℤ (0x10000 *ℤ x28));
-{ uint64_t x31 = (x30 >> 0x18);
-{ uint32_t x32 = (x30 & 0xffffff);
-{ ℤ x33 = (x22 +ℤ x28);
-{ uint64_t x34 = (x33 >> 0x18);
-{ uint32_t x35 = (x33 & 0xffffff);
-{ ℤ x36 = (x31 +ℤ x19);
-{ uint64_t x37 = (x36 >> 0x18);
-{ uint32_t x38 = (x36 & 0xffffff);
-{ uint64_t x39 = (x34 + x24);
-{ uint32_t x40 = (uint32_t) (x39 >> 0x18);
-{ uint32_t x41 = ((uint32_t)x39 & 0xffffff);
-{ ℤ x42 = (x37 +ℤ x18);
-{ uint64_t x43 = (x42 >> 0x18);
-{ uint32_t x44 = (x42 & 0xffffff);
-{ ℤ x45 = (x43 +ℤ x17);
-{ uint64_t x46 = (x45 >> 0x18);
-{ uint32_t x47 = (x45 & 0xffffff);
-{ ℤ x48 = (x46 +ℤ x16);
-{ uint64_t x49 = (x48 >> 0x18);
-{ uint32_t x50 = (x48 & 0xffffff);
-{ uint64_t x51 = (x49 + x29);
-{ uint32_t x52 = (uint32_t) (x51 >> 0x18);
-{ uint32_t x53 = ((uint32_t)x51 & 0xffffff);
-{ uint64_t x54 = (((uint64_t)0x1000000 * x52) + x53);
-{ uint32_t x55 = (uint32_t) (x54 >> 0x18);
-{ uint32_t x56 = ((uint32_t)x54 & 0xffffff);
-{ uint64_t x57 = ((x40 + x32) + ((uint64_t)0x10000 * x55));
-{ uint32_t x58 = (uint32_t) (x57 >> 0x18);
-{ uint32_t x59 = ((uint32_t)x57 & 0xffffff);
-{ uint32_t x60 = (x35 + x55);
-{ uint32_t x61 = (x60 >> 0x18);
-{ uint32_t x62 = (x60 & 0xffffff);
-out[0] = x56;
-out[1] = x50;
-out[2] = x47;
-out[3] = x44;
-out[4] = x58 + x38;
-out[5] = x59;
-out[6] = x61 + x41;
-out[7] = x62;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void fesquare(uint32_t out[8], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { ℤ x15 = ((((uint64_t)x2 * x13) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x13 * x2)))))))) +ℤ ((0x10000 *ℤ (((uint64_t)x14 * x13) + ((uint64_t)x13 * x14))) +ℤ (0x10000000000 *ℤ ((uint64_t)x13 * x13))));
+ { ℤ x16 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) +ℤ (((uint64_t)x13 * x13) +ℤ (0x10000 *ℤ (((uint64_t)x12 * x13) + (((uint64_t)x14 * x14) + ((uint64_t)x13 * x12))))));
+ { ℤ x17 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) +ℤ ((((uint64_t)x14 * x13) + ((uint64_t)x13 * x14)) +ℤ (0x10000 *ℤ (((uint64_t)x10 * x13) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x13 * x10)))))));
+ { ℤ x18 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) +ℤ ((((uint64_t)x12 * x13) + (((uint64_t)x14 * x14) + ((uint64_t)x13 * x12))) +ℤ (0x10000 *ℤ (((uint64_t)x8 * x13) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x13 * x8))))))));
+ { ℤ x19 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) +ℤ ((((uint64_t)x10 * x13) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x13 * x10)))) +ℤ (0x10000 *ℤ (((uint64_t)x6 * x13) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x13 * x6)))))))));
+ { ℤ x20 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) +ℤ ((((uint64_t)x8 * x13) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x13 * x8))))) +ℤ (0x10000 *ℤ (((uint64_t)x4 * x13) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((uint64_t)x13 * x4))))))))));
+ { uint64_t x21 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (((uint64_t)x6 * x13) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x13 * x6)))))));
+ { uint64_t x22 = (((uint64_t)x2 * x2) + (((uint64_t)x4 * x13) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((uint64_t)x13 * x4))))))));
+ { uint32_t x23 = (uint32_t) (x21 >> 0x18);
+ { uint32_t x24 = ((uint32_t)x21 & 0xffffff);
+ { ℤ x25 = (x15 >>ℤ 0x18);
+ { uint32_t x26 = (x15 & 0xffffff);
+ { ℤ x27 = ((0x1000000 *ℤ x25) +ℤ x26);
+ { ℤ x28 = (x27 >>ℤ 0x18);
+ { uint32_t x29 = (x27 & 0xffffff);
+ { ℤ x30 = ((x23 +ℤ x20) +ℤ (0x10000 *ℤ x28));
+ { uint64_t x31 = (x30 >> 0x18);
+ { uint32_t x32 = (x30 & 0xffffff);
+ { ℤ x33 = (x22 +ℤ x28);
+ { uint64_t x34 = (x33 >> 0x18);
+ { uint32_t x35 = (x33 & 0xffffff);
+ { ℤ x36 = (x31 +ℤ x19);
+ { uint64_t x37 = (x36 >> 0x18);
+ { uint32_t x38 = (x36 & 0xffffff);
+ { uint64_t x39 = (x34 + x24);
+ { uint32_t x40 = (uint32_t) (x39 >> 0x18);
+ { uint32_t x41 = ((uint32_t)x39 & 0xffffff);
+ { ℤ x42 = (x37 +ℤ x18);
+ { uint64_t x43 = (x42 >> 0x18);
+ { uint32_t x44 = (x42 & 0xffffff);
+ { ℤ x45 = (x43 +ℤ x17);
+ { uint64_t x46 = (x45 >> 0x18);
+ { uint32_t x47 = (x45 & 0xffffff);
+ { ℤ x48 = (x46 +ℤ x16);
+ { uint64_t x49 = (x48 >> 0x18);
+ { uint32_t x50 = (x48 & 0xffffff);
+ { uint64_t x51 = (x49 + x29);
+ { uint32_t x52 = (uint32_t) (x51 >> 0x18);
+ { uint32_t x53 = ((uint32_t)x51 & 0xffffff);
+ { uint64_t x54 = (((uint64_t)0x1000000 * x52) + x53);
+ { uint32_t x55 = (uint32_t) (x54 >> 0x18);
+ { uint32_t x56 = ((uint32_t)x54 & 0xffffff);
+ { uint64_t x57 = ((x40 + x32) + ((uint64_t)0x10000 * x55));
+ { uint32_t x58 = (uint32_t) (x57 >> 0x18);
+ { uint32_t x59 = ((uint32_t)x57 & 0xffffff);
+ { uint32_t x60 = (x35 + x55);
+ { uint32_t x61 = (x60 >> 0x18);
+ { uint32_t x62 = (x60 & 0xffffff);
+ out[0] = x62;
+ out[1] = (x61 + x41);
+ out[2] = x59;
+ out[3] = (x58 + x38);
+ out[4] = x44;
+ out[5] = x47;
+ out[6] = x50;
+ out[7] = x56;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e192m2e64m1/freeze.c b/src/Specific/solinas32_2e192m2e64m1/freeze.c
index ac3559f88..a2c6fa5b6 100644
--- a/src/Specific/solinas32_2e192m2e64m1/freeze.c
+++ b/src/Specific/solinas32_2e192m2e64m1/freeze.c
@@ -1,25 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x16;
-out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 24 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffff;;
+static void freeze(uint32_t out[8], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0xffffff);
+ { uint32_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x17, Return x4, 0xffffff);
+ { uint32_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x20, Return x6, 0xfeffff);
+ { uint32_t x25, uint8_t x26 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x23, Return x8, 0xffffff);
+ { uint32_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x26, Return x10, 0xffffff);
+ { uint32_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x29, Return x12, 0xffffff);
+ { uint32_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x32, Return x14, 0xffffff);
+ { uint32_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x35, Return x13, 0xffffff);
+ { uint32_t x39 = (uint32_t)cmovznz(x38, 0x0, 0xffffffff);
+ { uint32_t x40 = (x39 & 0xffffff);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x16, Return x40);
+ { uint32_t x44 = (x39 & 0xffffff);
+ { uint32_t x46, uint8_t x47 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x43, Return x19, Return x44);
+ { uint32_t x48 = (x39 & 0xfeffff);
+ { uint32_t x50, uint8_t x51 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x47, Return x22, Return x48);
+ { uint32_t x52 = (x39 & 0xffffff);
+ { uint32_t x54, uint8_t x55 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x51, Return x25, Return x52);
+ { uint32_t x56 = (x39 & 0xffffff);
+ { uint32_t x58, uint8_t x59 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x55, Return x28, Return x56);
+ { uint32_t x60 = (x39 & 0xffffff);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x59, Return x31, Return x60);
+ { uint32_t x64 = (x39 & 0xffffff);
+ { uint32_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x34, Return x64);
+ { uint32_t x68 = (x39 & 0xffffff);
+ { uint32_t x70, uint8_t _ = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x67, Return x37, Return x68);
+ out[0] = x42;
+ out[1] = x46;
+ out[2] = x50;
+ out[3] = x54;
+ out[4] = x58;
+ out[5] = x62;
+ out[6] = x66;
+ out[7] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e194m33/femul.c b/src/Specific/solinas32_2e194m33/femul.c
index c12303cb3..59c62a65b 100644
--- a/src/Specific/solinas32_2e194m33/femul.c
+++ b/src/Specific/solinas32_2e194m33/femul.c
@@ -1,66 +1,64 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint64_t x32 = (((uint64_t)x5 * x30) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + ((0x2 * ((uint64_t)x15 * x23)) + ((0x2 * ((uint64_t)x17 * x21)) + ((uint64_t)x16 * x19))))))));
-{ uint64_t x33 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + ((0x2 * ((uint64_t)x15 * x21)) + ((uint64_t)x17 * x19))))))) + (0x21 * ((uint64_t)x16 * x30)));
-{ uint64_t x34 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x15 * x19)))))) + (0x21 * (((uint64_t)x17 * x30) + ((uint64_t)x16 * x31))));
-{ uint64_t x35 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((0x2 * ((uint64_t)x11 * x21)) + ((uint64_t)x13 * x19))))) + (0x21 * ((0x2 * ((uint64_t)x15 * x30)) + ((0x2 * ((uint64_t)x17 * x31)) + (0x2 * ((uint64_t)x16 * x29))))));
-{ uint64_t x36 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((0x2 * ((uint64_t)x9 * x21)) + ((uint64_t)x11 * x19)))) + (0x21 * (((uint64_t)x13 * x30) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((uint64_t)x16 * x27))))));
-{ uint64_t x37 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + ((uint64_t)x9 * x19))) + (0x21 * (((uint64_t)x11 * x30) + (((uint64_t)x13 * x31) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((uint64_t)x16 * x25)))))));
-{ uint64_t x38 = ((((uint64_t)x5 * x21) + ((uint64_t)x7 * x19)) + (0x21 * (((uint64_t)x9 * x30) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x16 * x23))))))));
-{ uint64_t x39 = (((uint64_t)x5 * x19) + (0x21 * ((0x2 * ((uint64_t)x7 * x30)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + (((uint64_t)x13 * x27) + ((0x2 * ((uint64_t)x15 * x25)) + ((0x2 * ((uint64_t)x17 * x23)) + (0x2 * ((uint64_t)x16 * x21))))))))));
-{ uint64_t x40 = (x39 >> 0x19);
-{ uint32_t x41 = ((uint32_t)x39 & 0x1ffffff);
-{ uint64_t x42 = (x40 + x38);
-{ uint64_t x43 = (x42 >> 0x18);
-{ uint32_t x44 = ((uint32_t)x42 & 0xffffff);
-{ uint64_t x45 = (x43 + x37);
-{ uint64_t x46 = (x45 >> 0x18);
-{ uint32_t x47 = ((uint32_t)x45 & 0xffffff);
-{ uint64_t x48 = (x46 + x36);
-{ uint64_t x49 = (x48 >> 0x18);
-{ uint32_t x50 = ((uint32_t)x48 & 0xffffff);
-{ uint64_t x51 = (x49 + x35);
-{ uint64_t x52 = (x51 >> 0x19);
-{ uint32_t x53 = ((uint32_t)x51 & 0x1ffffff);
-{ uint64_t x54 = (x52 + x34);
-{ uint64_t x55 = (x54 >> 0x18);
-{ uint32_t x56 = ((uint32_t)x54 & 0xffffff);
-{ uint64_t x57 = (x55 + x33);
-{ uint64_t x58 = (x57 >> 0x18);
-{ uint32_t x59 = ((uint32_t)x57 & 0xffffff);
-{ uint64_t x60 = (x58 + x32);
-{ uint32_t x61 = (uint32_t) (x60 >> 0x18);
-{ uint32_t x62 = ((uint32_t)x60 & 0xffffff);
-{ uint64_t x63 = (x41 + ((uint64_t)0x21 * x61));
-{ uint32_t x64 = (uint32_t) (x63 >> 0x19);
-{ uint32_t x65 = ((uint32_t)x63 & 0x1ffffff);
-{ uint32_t x66 = (x64 + x44);
-{ uint32_t x67 = (x66 >> 0x18);
-{ uint32_t x68 = (x66 & 0xffffff);
-out[0] = x62;
-out[1] = x59;
-out[2] = x56;
-out[3] = x53;
-out[4] = x50;
-out[5] = x67 + x47;
-out[6] = x68;
-out[7] = x65;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void femul(uint32_t out[8], const uint32_t in1[8], const uint32_t in2[8]) {
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x30 = in2[7];
+ { const uint32_t x31 = in2[6];
+ { const uint32_t x29 = in2[5];
+ { const uint32_t x27 = in2[4];
+ { const uint32_t x25 = in2[3];
+ { const uint32_t x23 = in2[2];
+ { const uint32_t x21 = in2[1];
+ { const uint32_t x19 = in2[0];
+ { uint64_t x32 = (((uint64_t)x5 * x30) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + ((0x2 * ((uint64_t)x15 * x23)) + ((0x2 * ((uint64_t)x17 * x21)) + ((uint64_t)x16 * x19))))))));
+ { uint64_t x33 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + ((0x2 * ((uint64_t)x15 * x21)) + ((uint64_t)x17 * x19))))))) + (0x21 * ((uint64_t)x16 * x30)));
+ { uint64_t x34 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x15 * x19)))))) + (0x21 * (((uint64_t)x17 * x30) + ((uint64_t)x16 * x31))));
+ { uint64_t x35 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((0x2 * ((uint64_t)x11 * x21)) + ((uint64_t)x13 * x19))))) + (0x21 * ((0x2 * ((uint64_t)x15 * x30)) + ((0x2 * ((uint64_t)x17 * x31)) + (0x2 * ((uint64_t)x16 * x29))))));
+ { uint64_t x36 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((0x2 * ((uint64_t)x9 * x21)) + ((uint64_t)x11 * x19)))) + (0x21 * (((uint64_t)x13 * x30) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((uint64_t)x16 * x27))))));
+ { uint64_t x37 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + ((uint64_t)x9 * x19))) + (0x21 * (((uint64_t)x11 * x30) + (((uint64_t)x13 * x31) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((uint64_t)x16 * x25)))))));
+ { uint64_t x38 = ((((uint64_t)x5 * x21) + ((uint64_t)x7 * x19)) + (0x21 * (((uint64_t)x9 * x30) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x16 * x23))))))));
+ { uint64_t x39 = (((uint64_t)x5 * x19) + (0x21 * ((0x2 * ((uint64_t)x7 * x30)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + (((uint64_t)x13 * x27) + ((0x2 * ((uint64_t)x15 * x25)) + ((0x2 * ((uint64_t)x17 * x23)) + (0x2 * ((uint64_t)x16 * x21))))))))));
+ { uint64_t x40 = (x39 >> 0x19);
+ { uint32_t x41 = ((uint32_t)x39 & 0x1ffffff);
+ { uint64_t x42 = (x40 + x38);
+ { uint64_t x43 = (x42 >> 0x18);
+ { uint32_t x44 = ((uint32_t)x42 & 0xffffff);
+ { uint64_t x45 = (x43 + x37);
+ { uint64_t x46 = (x45 >> 0x18);
+ { uint32_t x47 = ((uint32_t)x45 & 0xffffff);
+ { uint64_t x48 = (x46 + x36);
+ { uint64_t x49 = (x48 >> 0x18);
+ { uint32_t x50 = ((uint32_t)x48 & 0xffffff);
+ { uint64_t x51 = (x49 + x35);
+ { uint64_t x52 = (x51 >> 0x19);
+ { uint32_t x53 = ((uint32_t)x51 & 0x1ffffff);
+ { uint64_t x54 = (x52 + x34);
+ { uint64_t x55 = (x54 >> 0x18);
+ { uint32_t x56 = ((uint32_t)x54 & 0xffffff);
+ { uint64_t x57 = (x55 + x33);
+ { uint64_t x58 = (x57 >> 0x18);
+ { uint32_t x59 = ((uint32_t)x57 & 0xffffff);
+ { uint64_t x60 = (x58 + x32);
+ { uint32_t x61 = (uint32_t) (x60 >> 0x18);
+ { uint32_t x62 = ((uint32_t)x60 & 0xffffff);
+ { uint64_t x63 = (x41 + ((uint64_t)0x21 * x61));
+ { uint32_t x64 = (uint32_t) (x63 >> 0x19);
+ { uint32_t x65 = ((uint32_t)x63 & 0x1ffffff);
+ { uint32_t x66 = (x64 + x44);
+ { uint32_t x67 = (x66 >> 0x18);
+ { uint32_t x68 = (x66 & 0xffffff);
+ out[0] = x65;
+ out[1] = x68;
+ out[2] = (x67 + x47);
+ out[3] = x50;
+ out[4] = x53;
+ out[5] = x56;
+ out[6] = x59;
+ out[7] = x62;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e194m33/fesquare.c b/src/Specific/solinas32_2e194m33/fesquare.c
index 0a48a9893..c5d941438 100644
--- a/src/Specific/solinas32_2e194m33/fesquare.c
+++ b/src/Specific/solinas32_2e194m33/fesquare.c
@@ -1,66 +1,56 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x15 = (((uint64_t)x2 * x13) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x13 * x2))))))));
-{ uint64_t x16 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x21 * ((uint64_t)x13 * x13)));
-{ uint64_t x17 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x21 * (((uint64_t)x14 * x13) + ((uint64_t)x13 * x14))));
-{ uint64_t x18 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x21 * ((0x2 * ((uint64_t)x12 * x13)) + ((0x2 * ((uint64_t)x14 * x14)) + (0x2 * ((uint64_t)x13 * x12))))));
-{ uint64_t x19 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x21 * (((uint64_t)x10 * x13) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + ((uint64_t)x13 * x10))))));
-{ uint64_t x20 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x21 * (((uint64_t)x8 * x13) + (((uint64_t)x10 * x14) + ((0x2 * ((uint64_t)x12 * x12)) + (((uint64_t)x14 * x10) + ((uint64_t)x13 * x8)))))));
-{ uint64_t x21 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x21 * (((uint64_t)x6 * x13) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x13 * x6))))))));
-{ uint64_t x22 = (((uint64_t)x2 * x2) + (0x21 * ((0x2 * ((uint64_t)x4 * x13)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + (((uint64_t)x10 * x10) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + (0x2 * ((uint64_t)x13 * x4))))))))));
-{ uint64_t x23 = (x22 >> 0x19);
-{ uint32_t x24 = ((uint32_t)x22 & 0x1ffffff);
-{ uint64_t x25 = (x23 + x21);
-{ uint64_t x26 = (x25 >> 0x18);
-{ uint32_t x27 = ((uint32_t)x25 & 0xffffff);
-{ uint64_t x28 = (x26 + x20);
-{ uint64_t x29 = (x28 >> 0x18);
-{ uint32_t x30 = ((uint32_t)x28 & 0xffffff);
-{ uint64_t x31 = (x29 + x19);
-{ uint64_t x32 = (x31 >> 0x18);
-{ uint32_t x33 = ((uint32_t)x31 & 0xffffff);
-{ uint64_t x34 = (x32 + x18);
-{ uint64_t x35 = (x34 >> 0x19);
-{ uint32_t x36 = ((uint32_t)x34 & 0x1ffffff);
-{ uint64_t x37 = (x35 + x17);
-{ uint64_t x38 = (x37 >> 0x18);
-{ uint32_t x39 = ((uint32_t)x37 & 0xffffff);
-{ uint64_t x40 = (x38 + x16);
-{ uint64_t x41 = (x40 >> 0x18);
-{ uint32_t x42 = ((uint32_t)x40 & 0xffffff);
-{ uint64_t x43 = (x41 + x15);
-{ uint32_t x44 = (uint32_t) (x43 >> 0x18);
-{ uint32_t x45 = ((uint32_t)x43 & 0xffffff);
-{ uint64_t x46 = (x24 + ((uint64_t)0x21 * x44));
-{ uint32_t x47 = (uint32_t) (x46 >> 0x19);
-{ uint32_t x48 = ((uint32_t)x46 & 0x1ffffff);
-{ uint32_t x49 = (x47 + x27);
-{ uint32_t x50 = (x49 >> 0x18);
-{ uint32_t x51 = (x49 & 0xffffff);
-out[0] = x45;
-out[1] = x42;
-out[2] = x39;
-out[3] = x36;
-out[4] = x33;
-out[5] = x50 + x30;
-out[6] = x51;
-out[7] = x48;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void fesquare(uint32_t out[8], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x15 = (((uint64_t)x2 * x13) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x13 * x2))))))));
+ { uint64_t x16 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x21 * ((uint64_t)x13 * x13)));
+ { uint64_t x17 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x21 * (((uint64_t)x14 * x13) + ((uint64_t)x13 * x14))));
+ { uint64_t x18 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x21 * ((0x2 * ((uint64_t)x12 * x13)) + ((0x2 * ((uint64_t)x14 * x14)) + (0x2 * ((uint64_t)x13 * x12))))));
+ { uint64_t x19 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x21 * (((uint64_t)x10 * x13) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + ((uint64_t)x13 * x10))))));
+ { uint64_t x20 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x21 * (((uint64_t)x8 * x13) + (((uint64_t)x10 * x14) + ((0x2 * ((uint64_t)x12 * x12)) + (((uint64_t)x14 * x10) + ((uint64_t)x13 * x8)))))));
+ { uint64_t x21 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x21 * (((uint64_t)x6 * x13) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x13 * x6))))))));
+ { uint64_t x22 = (((uint64_t)x2 * x2) + (0x21 * ((0x2 * ((uint64_t)x4 * x13)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + (((uint64_t)x10 * x10) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + (0x2 * ((uint64_t)x13 * x4))))))))));
+ { uint64_t x23 = (x22 >> 0x19);
+ { uint32_t x24 = ((uint32_t)x22 & 0x1ffffff);
+ { uint64_t x25 = (x23 + x21);
+ { uint64_t x26 = (x25 >> 0x18);
+ { uint32_t x27 = ((uint32_t)x25 & 0xffffff);
+ { uint64_t x28 = (x26 + x20);
+ { uint64_t x29 = (x28 >> 0x18);
+ { uint32_t x30 = ((uint32_t)x28 & 0xffffff);
+ { uint64_t x31 = (x29 + x19);
+ { uint64_t x32 = (x31 >> 0x18);
+ { uint32_t x33 = ((uint32_t)x31 & 0xffffff);
+ { uint64_t x34 = (x32 + x18);
+ { uint64_t x35 = (x34 >> 0x19);
+ { uint32_t x36 = ((uint32_t)x34 & 0x1ffffff);
+ { uint64_t x37 = (x35 + x17);
+ { uint64_t x38 = (x37 >> 0x18);
+ { uint32_t x39 = ((uint32_t)x37 & 0xffffff);
+ { uint64_t x40 = (x38 + x16);
+ { uint64_t x41 = (x40 >> 0x18);
+ { uint32_t x42 = ((uint32_t)x40 & 0xffffff);
+ { uint64_t x43 = (x41 + x15);
+ { uint32_t x44 = (uint32_t) (x43 >> 0x18);
+ { uint32_t x45 = ((uint32_t)x43 & 0xffffff);
+ { uint64_t x46 = (x24 + ((uint64_t)0x21 * x44));
+ { uint32_t x47 = (uint32_t) (x46 >> 0x19);
+ { uint32_t x48 = ((uint32_t)x46 & 0x1ffffff);
+ { uint32_t x49 = (x47 + x27);
+ { uint32_t x50 = (x49 >> 0x18);
+ { uint32_t x51 = (x49 & 0xffffff);
+ out[0] = x48;
+ out[1] = x51;
+ out[2] = (x50 + x30);
+ out[3] = x33;
+ out[4] = x36;
+ out[5] = x39;
+ out[6] = x42;
+ out[7] = x45;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e194m33/freeze.c b/src/Specific/solinas32_2e194m33/freeze.c
index 9a73df9dc..9dd22f931 100644
--- a/src/Specific/solinas32_2e194m33/freeze.c
+++ b/src/Specific/solinas32_2e194m33/freeze.c
@@ -1,25 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x16;
-out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 25 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x1ffffdf;;
+static void freeze(uint32_t out[8], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x1ffffdf);
+ { uint32_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x17, Return x4, 0xffffff);
+ { uint32_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x20, Return x6, 0xffffff);
+ { uint32_t x25, uint8_t x26 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x23, Return x8, 0xffffff);
+ { uint32_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x26, Return x10, 0x1ffffff);
+ { uint32_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x29, Return x12, 0xffffff);
+ { uint32_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x32, Return x14, 0xffffff);
+ { uint32_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x35, Return x13, 0xffffff);
+ { uint32_t x39 = (uint32_t)cmovznz(x38, 0x0, 0xffffffff);
+ { uint32_t x40 = (x39 & 0x1ffffdf);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x16, Return x40);
+ { uint32_t x44 = (x39 & 0xffffff);
+ { uint32_t x46, uint8_t x47 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x43, Return x19, Return x44);
+ { uint32_t x48 = (x39 & 0xffffff);
+ { uint32_t x50, uint8_t x51 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x47, Return x22, Return x48);
+ { uint32_t x52 = (x39 & 0xffffff);
+ { uint32_t x54, uint8_t x55 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x51, Return x25, Return x52);
+ { uint32_t x56 = (x39 & 0x1ffffff);
+ { uint32_t x58, uint8_t x59 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x55, Return x28, Return x56);
+ { uint32_t x60 = (x39 & 0xffffff);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x59, Return x31, Return x60);
+ { uint32_t x64 = (x39 & 0xffffff);
+ { uint32_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x34, Return x64);
+ { uint32_t x68 = (x39 & 0xffffff);
+ { uint32_t x70, uint8_t _ = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x67, Return x37, Return x68);
+ out[0] = x42;
+ out[1] = x46;
+ out[2] = x50;
+ out[3] = x54;
+ out[4] = x58;
+ out[5] = x62;
+ out[6] = x66;
+ out[7] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e196m15/femul.c b/src/Specific/solinas32_2e196m15/femul.c
index a3ff58741..7490e55da 100644
--- a/src/Specific/solinas32_2e196m15/femul.c
+++ b/src/Specific/solinas32_2e196m15/femul.c
@@ -1,61 +1,57 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint64_t x28 = (((uint64_t)x5 * x26) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + (((uint64_t)x15 * x19) + ((uint64_t)x14 * x17)))))));
-{ uint64_t x29 = ((((uint64_t)x5 * x27) + (((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + (((uint64_t)x11 * x21) + (((uint64_t)x13 * x19) + ((uint64_t)x15 * x17)))))) + (0xf * ((uint64_t)x14 * x26)));
-{ ℤ x30 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + ((uint64_t)x13 * x17))))) +ℤ (0xf *ℤ (((uint64_t)x15 * x26) + ((uint64_t)x14 * x27))));
-{ ℤ x31 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + (((uint64_t)x9 * x19) + ((uint64_t)x11 * x17)))) +ℤ (0xf *ℤ (((uint64_t)x13 * x26) + (((uint64_t)x15 * x27) + ((uint64_t)x14 * x25)))));
-{ ℤ x32 = ((((uint64_t)x5 * x21) + (((uint64_t)x7 * x19) + ((uint64_t)x9 * x17))) +ℤ (0xf *ℤ (((uint64_t)x11 * x26) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + ((uint64_t)x14 * x23))))));
-{ ℤ x33 = ((((uint64_t)x5 * x19) + ((uint64_t)x7 * x17)) +ℤ (0xf *ℤ (((uint64_t)x9 * x26) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + ((uint64_t)x14 * x21)))))));
-{ ℤ x34 = (((uint64_t)x5 * x17) +ℤ (0xf *ℤ (((uint64_t)x7 * x26) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + (((uint64_t)x15 * x21) + ((uint64_t)x14 * x19))))))));
-{ uint64_t x35 = (x34 >> 0x1c);
-{ uint32_t x36 = (x34 & 0xfffffff);
-{ ℤ x37 = (x35 +ℤ x33);
-{ uint64_t x38 = (x37 >> 0x1c);
-{ uint32_t x39 = (x37 & 0xfffffff);
-{ ℤ x40 = (x38 +ℤ x32);
-{ uint64_t x41 = (x40 >> 0x1c);
-{ uint32_t x42 = (x40 & 0xfffffff);
-{ ℤ x43 = (x41 +ℤ x31);
-{ uint64_t x44 = (x43 >> 0x1c);
-{ uint32_t x45 = (x43 & 0xfffffff);
-{ ℤ x46 = (x44 +ℤ x30);
-{ uint64_t x47 = (x46 >> 0x1c);
-{ uint32_t x48 = (x46 & 0xfffffff);
-{ uint64_t x49 = (x47 + x29);
-{ uint64_t x50 = (x49 >> 0x1c);
-{ uint32_t x51 = ((uint32_t)x49 & 0xfffffff);
-{ uint64_t x52 = (x50 + x28);
-{ uint64_t x53 = (x52 >> 0x1c);
-{ uint32_t x54 = ((uint32_t)x52 & 0xfffffff);
-{ uint64_t x55 = (x36 + (0xf * x53));
-{ uint32_t x56 = (uint32_t) (x55 >> 0x1c);
-{ uint32_t x57 = ((uint32_t)x55 & 0xfffffff);
-{ uint32_t x58 = (x56 + x39);
-{ uint32_t x59 = (x58 >> 0x1c);
-{ uint32_t x60 = (x58 & 0xfffffff);
-out[0] = x54;
-out[1] = x51;
-out[2] = x48;
-out[3] = x45;
-out[4] = x59 + x42;
-out[5] = x60;
-out[6] = x57;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void femul(uint32_t out[7], const uint32_t in1[7], const uint32_t in2[7]) {
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x26 = in2[6];
+ { const uint32_t x27 = in2[5];
+ { const uint32_t x25 = in2[4];
+ { const uint32_t x23 = in2[3];
+ { const uint32_t x21 = in2[2];
+ { const uint32_t x19 = in2[1];
+ { const uint32_t x17 = in2[0];
+ { uint64_t x28 = (((uint64_t)x5 * x26) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + (((uint64_t)x15 * x19) + ((uint64_t)x14 * x17)))))));
+ { uint64_t x29 = ((((uint64_t)x5 * x27) + (((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + (((uint64_t)x11 * x21) + (((uint64_t)x13 * x19) + ((uint64_t)x15 * x17)))))) + (0xf * ((uint64_t)x14 * x26)));
+ { ℤ x30 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + ((uint64_t)x13 * x17))))) +ℤ (0xf *ℤ (((uint64_t)x15 * x26) + ((uint64_t)x14 * x27))));
+ { ℤ x31 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + (((uint64_t)x9 * x19) + ((uint64_t)x11 * x17)))) +ℤ (0xf *ℤ (((uint64_t)x13 * x26) + (((uint64_t)x15 * x27) + ((uint64_t)x14 * x25)))));
+ { ℤ x32 = ((((uint64_t)x5 * x21) + (((uint64_t)x7 * x19) + ((uint64_t)x9 * x17))) +ℤ (0xf *ℤ (((uint64_t)x11 * x26) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + ((uint64_t)x14 * x23))))));
+ { ℤ x33 = ((((uint64_t)x5 * x19) + ((uint64_t)x7 * x17)) +ℤ (0xf *ℤ (((uint64_t)x9 * x26) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + ((uint64_t)x14 * x21)))))));
+ { ℤ x34 = (((uint64_t)x5 * x17) +ℤ (0xf *ℤ (((uint64_t)x7 * x26) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + (((uint64_t)x15 * x21) + ((uint64_t)x14 * x19))))))));
+ { uint64_t x35 = (x34 >> 0x1c);
+ { uint32_t x36 = (x34 & 0xfffffff);
+ { ℤ x37 = (x35 +ℤ x33);
+ { uint64_t x38 = (x37 >> 0x1c);
+ { uint32_t x39 = (x37 & 0xfffffff);
+ { ℤ x40 = (x38 +ℤ x32);
+ { uint64_t x41 = (x40 >> 0x1c);
+ { uint32_t x42 = (x40 & 0xfffffff);
+ { ℤ x43 = (x41 +ℤ x31);
+ { uint64_t x44 = (x43 >> 0x1c);
+ { uint32_t x45 = (x43 & 0xfffffff);
+ { ℤ x46 = (x44 +ℤ x30);
+ { uint64_t x47 = (x46 >> 0x1c);
+ { uint32_t x48 = (x46 & 0xfffffff);
+ { uint64_t x49 = (x47 + x29);
+ { uint64_t x50 = (x49 >> 0x1c);
+ { uint32_t x51 = ((uint32_t)x49 & 0xfffffff);
+ { uint64_t x52 = (x50 + x28);
+ { uint64_t x53 = (x52 >> 0x1c);
+ { uint32_t x54 = ((uint32_t)x52 & 0xfffffff);
+ { uint64_t x55 = (x36 + (0xf * x53));
+ { uint32_t x56 = (uint32_t) (x55 >> 0x1c);
+ { uint32_t x57 = ((uint32_t)x55 & 0xfffffff);
+ { uint32_t x58 = (x56 + x39);
+ { uint32_t x59 = (x58 >> 0x1c);
+ { uint32_t x60 = (x58 & 0xfffffff);
+ out[0] = x57;
+ out[1] = x60;
+ out[2] = (x59 + x42);
+ out[3] = x45;
+ out[4] = x48;
+ out[5] = x51;
+ out[6] = x54;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e196m15/fesquare.c b/src/Specific/solinas32_2e196m15/fesquare.c
index eb632a21d..08f1b982f 100644
--- a/src/Specific/solinas32_2e196m15/fesquare.c
+++ b/src/Specific/solinas32_2e196m15/fesquare.c
@@ -1,61 +1,50 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x13 = (((uint64_t)x2 * x11) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x11 * x2)))))));
-{ uint64_t x14 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0xf * ((uint64_t)x11 * x11)));
-{ ℤ x15 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) +ℤ (0xf *ℤ (((uint64_t)x12 * x11) + ((uint64_t)x11 * x12))));
-{ ℤ x16 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) +ℤ (0xf *ℤ (((uint64_t)x10 * x11) + (((uint64_t)x12 * x12) + ((uint64_t)x11 * x10)))));
-{ ℤ x17 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) +ℤ (0xf *ℤ (((uint64_t)x8 * x11) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + ((uint64_t)x11 * x8))))));
-{ ℤ x18 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) +ℤ (0xf *ℤ (((uint64_t)x6 * x11) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + ((uint64_t)x11 * x6)))))));
-{ ℤ x19 = (((uint64_t)x2 * x2) +ℤ (0xf *ℤ (((uint64_t)x4 * x11) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + ((uint64_t)x11 * x4))))))));
-{ uint64_t x20 = (x19 >> 0x1c);
-{ uint32_t x21 = (x19 & 0xfffffff);
-{ ℤ x22 = (x20 +ℤ x18);
-{ uint64_t x23 = (x22 >> 0x1c);
-{ uint32_t x24 = (x22 & 0xfffffff);
-{ ℤ x25 = (x23 +ℤ x17);
-{ uint64_t x26 = (x25 >> 0x1c);
-{ uint32_t x27 = (x25 & 0xfffffff);
-{ ℤ x28 = (x26 +ℤ x16);
-{ uint64_t x29 = (x28 >> 0x1c);
-{ uint32_t x30 = (x28 & 0xfffffff);
-{ ℤ x31 = (x29 +ℤ x15);
-{ uint64_t x32 = (x31 >> 0x1c);
-{ uint32_t x33 = (x31 & 0xfffffff);
-{ uint64_t x34 = (x32 + x14);
-{ uint64_t x35 = (x34 >> 0x1c);
-{ uint32_t x36 = ((uint32_t)x34 & 0xfffffff);
-{ uint64_t x37 = (x35 + x13);
-{ uint64_t x38 = (x37 >> 0x1c);
-{ uint32_t x39 = ((uint32_t)x37 & 0xfffffff);
-{ uint64_t x40 = (x21 + (0xf * x38));
-{ uint32_t x41 = (uint32_t) (x40 >> 0x1c);
-{ uint32_t x42 = ((uint32_t)x40 & 0xfffffff);
-{ uint32_t x43 = (x41 + x24);
-{ uint32_t x44 = (x43 >> 0x1c);
-{ uint32_t x45 = (x43 & 0xfffffff);
-out[0] = x39;
-out[1] = x36;
-out[2] = x33;
-out[3] = x30;
-out[4] = x44 + x27;
-out[5] = x45;
-out[6] = x42;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void fesquare(uint32_t out[7], const uint32_t in1[7]) {
+ { const uint32_t x11 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x13 = (((uint64_t)x2 * x11) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x11 * x2)))))));
+ { uint64_t x14 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0xf * ((uint64_t)x11 * x11)));
+ { ℤ x15 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) +ℤ (0xf *ℤ (((uint64_t)x12 * x11) + ((uint64_t)x11 * x12))));
+ { ℤ x16 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) +ℤ (0xf *ℤ (((uint64_t)x10 * x11) + (((uint64_t)x12 * x12) + ((uint64_t)x11 * x10)))));
+ { ℤ x17 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) +ℤ (0xf *ℤ (((uint64_t)x8 * x11) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + ((uint64_t)x11 * x8))))));
+ { ℤ x18 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) +ℤ (0xf *ℤ (((uint64_t)x6 * x11) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + ((uint64_t)x11 * x6)))))));
+ { ℤ x19 = (((uint64_t)x2 * x2) +ℤ (0xf *ℤ (((uint64_t)x4 * x11) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + ((uint64_t)x11 * x4))))))));
+ { uint64_t x20 = (x19 >> 0x1c);
+ { uint32_t x21 = (x19 & 0xfffffff);
+ { ℤ x22 = (x20 +ℤ x18);
+ { uint64_t x23 = (x22 >> 0x1c);
+ { uint32_t x24 = (x22 & 0xfffffff);
+ { ℤ x25 = (x23 +ℤ x17);
+ { uint64_t x26 = (x25 >> 0x1c);
+ { uint32_t x27 = (x25 & 0xfffffff);
+ { ℤ x28 = (x26 +ℤ x16);
+ { uint64_t x29 = (x28 >> 0x1c);
+ { uint32_t x30 = (x28 & 0xfffffff);
+ { ℤ x31 = (x29 +ℤ x15);
+ { uint64_t x32 = (x31 >> 0x1c);
+ { uint32_t x33 = (x31 & 0xfffffff);
+ { uint64_t x34 = (x32 + x14);
+ { uint64_t x35 = (x34 >> 0x1c);
+ { uint32_t x36 = ((uint32_t)x34 & 0xfffffff);
+ { uint64_t x37 = (x35 + x13);
+ { uint64_t x38 = (x37 >> 0x1c);
+ { uint32_t x39 = ((uint32_t)x37 & 0xfffffff);
+ { uint64_t x40 = (x21 + (0xf * x38));
+ { uint32_t x41 = (uint32_t) (x40 >> 0x1c);
+ { uint32_t x42 = ((uint32_t)x40 & 0xfffffff);
+ { uint32_t x43 = (x41 + x24);
+ { uint32_t x44 = (x43 >> 0x1c);
+ { uint32_t x45 = (x43 & 0xfffffff);
+ out[0] = x42;
+ out[1] = x45;
+ out[2] = (x44 + x27);
+ out[3] = x30;
+ out[4] = x33;
+ out[5] = x36;
+ out[6] = x39;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e196m15/freeze.c b/src/Specific/solinas32_2e196m15/freeze.c
index ebaee8ab2..8a06700b4 100644
--- a/src/Specific/solinas32_2e196m15/freeze.c
+++ b/src/Specific/solinas32_2e196m15/freeze.c
@@ -1,25 +1,39 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x14;
-out[1] = uint8_t x15 = Op Syntax.SubWithGetBorrow 28 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffff1;;
+static void freeze(uint32_t out[7], const uint32_t in1[7]) {
+ { const uint32_t x11 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0xffffff1);
+ { uint32_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x15, Return x4, 0xfffffff);
+ { uint32_t x20, uint8_t x21 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x18, Return x6, 0xfffffff);
+ { uint32_t x23, uint8_t x24 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x21, Return x8, 0xfffffff);
+ { uint32_t x26, uint8_t x27 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x24, Return x10, 0xfffffff);
+ { uint32_t x29, uint8_t x30 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x27, Return x12, 0xfffffff);
+ { uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x30, Return x11, 0xfffffff);
+ { uint32_t x34 = (uint32_t)cmovznz(x33, 0x0, 0xffffffff);
+ { uint32_t x35 = (x34 & 0xffffff1);
+ { uint32_t x37, uint8_t x38 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x14, Return x35);
+ { uint32_t x39 = (x34 & 0xfffffff);
+ { uint32_t x41, uint8_t x42 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x38, Return x17, Return x39);
+ { uint32_t x43 = (x34 & 0xfffffff);
+ { uint32_t x45, uint8_t x46 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x20, Return x43);
+ { uint32_t x47 = (x34 & 0xfffffff);
+ { uint32_t x49, uint8_t x50 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x46, Return x23, Return x47);
+ { uint32_t x51 = (x34 & 0xfffffff);
+ { uint32_t x53, uint8_t x54 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x50, Return x26, Return x51);
+ { uint32_t x55 = (x34 & 0xfffffff);
+ { uint32_t x57, uint8_t x58 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x54, Return x29, Return x55);
+ { uint32_t x59 = (x34 & 0xfffffff);
+ { uint32_t x61, uint8_t _ = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x58, Return x32, Return x59);
+ out[0] = x37;
+ out[1] = x41;
+ out[2] = x45;
+ out[3] = x49;
+ out[4] = x53;
+ out[5] = x57;
+ out[6] = x61;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e198m17/femul.c b/src/Specific/solinas32_2e198m17/femul.c
index 3de6c467f..fa8aadc0c 100644
--- a/src/Specific/solinas32_2e198m17/femul.c
+++ b/src/Specific/solinas32_2e198m17/femul.c
@@ -1,71 +1,71 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
-{ uint64_t x36 = (((uint64_t)x5 * x34) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + (((uint64_t)x19 * x23) + ((uint64_t)x18 * x21)))))))));
-{ uint64_t x37 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x19 * x21)))))))) + (0x11 * ((uint64_t)x18 * x34)));
-{ uint64_t x38 = ((((uint64_t)x5 * x33) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + ((uint64_t)x17 * x21))))))) + (0x11 * (((uint64_t)x19 * x34) + ((uint64_t)x18 * x35))));
-{ uint64_t x39 = ((((uint64_t)x5 * x31) + (((uint64_t)x7 * x29) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + ((uint64_t)x15 * x21)))))) + (0x11 * (((uint64_t)x17 * x34) + (((uint64_t)x19 * x35) + ((uint64_t)x18 * x33)))));
-{ uint64_t x40 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + ((uint64_t)x13 * x21))))) + (0x11 * (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))));
-{ uint64_t x41 = ((((uint64_t)x5 * x27) + (((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + ((uint64_t)x11 * x21)))) + (0x11 * (((uint64_t)x13 * x34) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + ((uint64_t)x18 * x29)))))));
-{ uint64_t x42 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + ((uint64_t)x9 * x21))) + (0x11 * (((uint64_t)x11 * x34) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + (((uint64_t)x19 * x29) + ((uint64_t)x18 * x27))))))));
-{ uint64_t x43 = ((((uint64_t)x5 * x23) + ((uint64_t)x7 * x21)) + (0x11 * (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))));
-{ uint64_t x44 = (((uint64_t)x5 * x21) + (0x11 * (((uint64_t)x7 * x34) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + (((uint64_t)x17 * x27) + (((uint64_t)x19 * x25) + ((uint64_t)x18 * x23))))))))));
-{ uint64_t x45 = (x44 >> 0x16);
-{ uint32_t x46 = ((uint32_t)x44 & 0x3fffff);
-{ uint64_t x47 = (x45 + x43);
-{ uint64_t x48 = (x47 >> 0x16);
-{ uint32_t x49 = ((uint32_t)x47 & 0x3fffff);
-{ uint64_t x50 = (x48 + x42);
-{ uint64_t x51 = (x50 >> 0x16);
-{ uint32_t x52 = ((uint32_t)x50 & 0x3fffff);
-{ uint64_t x53 = (x51 + x41);
-{ uint32_t x54 = (uint32_t) (x53 >> 0x16);
-{ uint32_t x55 = ((uint32_t)x53 & 0x3fffff);
-{ uint64_t x56 = (x54 + x40);
-{ uint32_t x57 = (uint32_t) (x56 >> 0x16);
-{ uint32_t x58 = ((uint32_t)x56 & 0x3fffff);
-{ uint64_t x59 = (x57 + x39);
-{ uint32_t x60 = (uint32_t) (x59 >> 0x16);
-{ uint32_t x61 = ((uint32_t)x59 & 0x3fffff);
-{ uint64_t x62 = (x60 + x38);
-{ uint32_t x63 = (uint32_t) (x62 >> 0x16);
-{ uint32_t x64 = ((uint32_t)x62 & 0x3fffff);
-{ uint64_t x65 = (x63 + x37);
-{ uint32_t x66 = (uint32_t) (x65 >> 0x16);
-{ uint32_t x67 = ((uint32_t)x65 & 0x3fffff);
-{ uint64_t x68 = (x66 + x36);
-{ uint32_t x69 = (uint32_t) (x68 >> 0x16);
-{ uint32_t x70 = ((uint32_t)x68 & 0x3fffff);
-{ uint64_t x71 = (x46 + ((uint64_t)0x11 * x69));
-{ uint32_t x72 = (uint32_t) (x71 >> 0x16);
-{ uint32_t x73 = ((uint32_t)x71 & 0x3fffff);
-{ uint32_t x74 = (x72 + x49);
-{ uint32_t x75 = (x74 >> 0x16);
-{ uint32_t x76 = (x74 & 0x3fffff);
-out[0] = x70;
-out[1] = x67;
-out[2] = x64;
-out[3] = x61;
-out[4] = x58;
-out[5] = x55;
-out[6] = x75 + x52;
-out[7] = x76;
-out[8] = x73;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void femul(uint32_t out[9], const uint32_t in1[9], const uint32_t in2[9]) {
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x34 = in2[8];
+ { const uint32_t x35 = in2[7];
+ { const uint32_t x33 = in2[6];
+ { const uint32_t x31 = in2[5];
+ { const uint32_t x29 = in2[4];
+ { const uint32_t x27 = in2[3];
+ { const uint32_t x25 = in2[2];
+ { const uint32_t x23 = in2[1];
+ { const uint32_t x21 = in2[0];
+ { uint64_t x36 = (((uint64_t)x5 * x34) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + (((uint64_t)x19 * x23) + ((uint64_t)x18 * x21)))))))));
+ { uint64_t x37 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x19 * x21)))))))) + (0x11 * ((uint64_t)x18 * x34)));
+ { uint64_t x38 = ((((uint64_t)x5 * x33) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + ((uint64_t)x17 * x21))))))) + (0x11 * (((uint64_t)x19 * x34) + ((uint64_t)x18 * x35))));
+ { uint64_t x39 = ((((uint64_t)x5 * x31) + (((uint64_t)x7 * x29) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + ((uint64_t)x15 * x21)))))) + (0x11 * (((uint64_t)x17 * x34) + (((uint64_t)x19 * x35) + ((uint64_t)x18 * x33)))));
+ { uint64_t x40 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + ((uint64_t)x13 * x21))))) + (0x11 * (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))));
+ { uint64_t x41 = ((((uint64_t)x5 * x27) + (((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + ((uint64_t)x11 * x21)))) + (0x11 * (((uint64_t)x13 * x34) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + ((uint64_t)x18 * x29)))))));
+ { uint64_t x42 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + ((uint64_t)x9 * x21))) + (0x11 * (((uint64_t)x11 * x34) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + (((uint64_t)x19 * x29) + ((uint64_t)x18 * x27))))))));
+ { uint64_t x43 = ((((uint64_t)x5 * x23) + ((uint64_t)x7 * x21)) + (0x11 * (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))));
+ { uint64_t x44 = (((uint64_t)x5 * x21) + (0x11 * (((uint64_t)x7 * x34) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + (((uint64_t)x17 * x27) + (((uint64_t)x19 * x25) + ((uint64_t)x18 * x23))))))))));
+ { uint64_t x45 = (x44 >> 0x16);
+ { uint32_t x46 = ((uint32_t)x44 & 0x3fffff);
+ { uint64_t x47 = (x45 + x43);
+ { uint64_t x48 = (x47 >> 0x16);
+ { uint32_t x49 = ((uint32_t)x47 & 0x3fffff);
+ { uint64_t x50 = (x48 + x42);
+ { uint64_t x51 = (x50 >> 0x16);
+ { uint32_t x52 = ((uint32_t)x50 & 0x3fffff);
+ { uint64_t x53 = (x51 + x41);
+ { uint32_t x54 = (uint32_t) (x53 >> 0x16);
+ { uint32_t x55 = ((uint32_t)x53 & 0x3fffff);
+ { uint64_t x56 = (x54 + x40);
+ { uint32_t x57 = (uint32_t) (x56 >> 0x16);
+ { uint32_t x58 = ((uint32_t)x56 & 0x3fffff);
+ { uint64_t x59 = (x57 + x39);
+ { uint32_t x60 = (uint32_t) (x59 >> 0x16);
+ { uint32_t x61 = ((uint32_t)x59 & 0x3fffff);
+ { uint64_t x62 = (x60 + x38);
+ { uint32_t x63 = (uint32_t) (x62 >> 0x16);
+ { uint32_t x64 = ((uint32_t)x62 & 0x3fffff);
+ { uint64_t x65 = (x63 + x37);
+ { uint32_t x66 = (uint32_t) (x65 >> 0x16);
+ { uint32_t x67 = ((uint32_t)x65 & 0x3fffff);
+ { uint64_t x68 = (x66 + x36);
+ { uint32_t x69 = (uint32_t) (x68 >> 0x16);
+ { uint32_t x70 = ((uint32_t)x68 & 0x3fffff);
+ { uint64_t x71 = (x46 + ((uint64_t)0x11 * x69));
+ { uint32_t x72 = (uint32_t) (x71 >> 0x16);
+ { uint32_t x73 = ((uint32_t)x71 & 0x3fffff);
+ { uint32_t x74 = (x72 + x49);
+ { uint32_t x75 = (x74 >> 0x16);
+ { uint32_t x76 = (x74 & 0x3fffff);
+ out[0] = x73;
+ out[1] = x76;
+ out[2] = (x75 + x52);
+ out[3] = x55;
+ out[4] = x58;
+ out[5] = x61;
+ out[6] = x64;
+ out[7] = x67;
+ out[8] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e198m17/fesquare.c b/src/Specific/solinas32_2e198m17/fesquare.c
index 3685d9d70..c146341e3 100644
--- a/src/Specific/solinas32_2e198m17/fesquare.c
+++ b/src/Specific/solinas32_2e198m17/fesquare.c
@@ -1,71 +1,62 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x17 = (((uint64_t)x2 * x15) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x15 * x2)))))))));
-{ uint64_t x18 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x11 * ((uint64_t)x15 * x15)));
-{ uint64_t x19 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x11 * (((uint64_t)x16 * x15) + ((uint64_t)x15 * x16))));
-{ uint64_t x20 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x11 * (((uint64_t)x14 * x15) + (((uint64_t)x16 * x16) + ((uint64_t)x15 * x14)))));
-{ uint64_t x21 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x11 * (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))));
-{ uint64_t x22 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x11 * (((uint64_t)x10 * x15) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + ((uint64_t)x15 * x10)))))));
-{ uint64_t x23 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x11 * (((uint64_t)x8 * x15) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + ((uint64_t)x15 * x8))))))));
-{ uint64_t x24 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x11 * (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))));
-{ uint64_t x25 = (((uint64_t)x2 * x2) + (0x11 * (((uint64_t)x4 * x15) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + ((uint64_t)x15 * x4))))))))));
-{ uint64_t x26 = (x25 >> 0x16);
-{ uint32_t x27 = ((uint32_t)x25 & 0x3fffff);
-{ uint64_t x28 = (x26 + x24);
-{ uint64_t x29 = (x28 >> 0x16);
-{ uint32_t x30 = ((uint32_t)x28 & 0x3fffff);
-{ uint64_t x31 = (x29 + x23);
-{ uint64_t x32 = (x31 >> 0x16);
-{ uint32_t x33 = ((uint32_t)x31 & 0x3fffff);
-{ uint64_t x34 = (x32 + x22);
-{ uint32_t x35 = (uint32_t) (x34 >> 0x16);
-{ uint32_t x36 = ((uint32_t)x34 & 0x3fffff);
-{ uint64_t x37 = (x35 + x21);
-{ uint32_t x38 = (uint32_t) (x37 >> 0x16);
-{ uint32_t x39 = ((uint32_t)x37 & 0x3fffff);
-{ uint64_t x40 = (x38 + x20);
-{ uint32_t x41 = (uint32_t) (x40 >> 0x16);
-{ uint32_t x42 = ((uint32_t)x40 & 0x3fffff);
-{ uint64_t x43 = (x41 + x19);
-{ uint32_t x44 = (uint32_t) (x43 >> 0x16);
-{ uint32_t x45 = ((uint32_t)x43 & 0x3fffff);
-{ uint64_t x46 = (x44 + x18);
-{ uint32_t x47 = (uint32_t) (x46 >> 0x16);
-{ uint32_t x48 = ((uint32_t)x46 & 0x3fffff);
-{ uint64_t x49 = (x47 + x17);
-{ uint32_t x50 = (uint32_t) (x49 >> 0x16);
-{ uint32_t x51 = ((uint32_t)x49 & 0x3fffff);
-{ uint64_t x52 = (x27 + ((uint64_t)0x11 * x50));
-{ uint32_t x53 = (uint32_t) (x52 >> 0x16);
-{ uint32_t x54 = ((uint32_t)x52 & 0x3fffff);
-{ uint32_t x55 = (x53 + x30);
-{ uint32_t x56 = (x55 >> 0x16);
-{ uint32_t x57 = (x55 & 0x3fffff);
-out[0] = x51;
-out[1] = x48;
-out[2] = x45;
-out[3] = x42;
-out[4] = x39;
-out[5] = x36;
-out[6] = x56 + x33;
-out[7] = x57;
-out[8] = x54;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void fesquare(uint32_t out[9], const uint32_t in1[9]) {
+ { const uint32_t x15 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x17 = (((uint64_t)x2 * x15) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x15 * x2)))))))));
+ { uint64_t x18 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x11 * ((uint64_t)x15 * x15)));
+ { uint64_t x19 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x11 * (((uint64_t)x16 * x15) + ((uint64_t)x15 * x16))));
+ { uint64_t x20 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x11 * (((uint64_t)x14 * x15) + (((uint64_t)x16 * x16) + ((uint64_t)x15 * x14)))));
+ { uint64_t x21 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x11 * (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))));
+ { uint64_t x22 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x11 * (((uint64_t)x10 * x15) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + ((uint64_t)x15 * x10)))))));
+ { uint64_t x23 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x11 * (((uint64_t)x8 * x15) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + ((uint64_t)x15 * x8))))))));
+ { uint64_t x24 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x11 * (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))));
+ { uint64_t x25 = (((uint64_t)x2 * x2) + (0x11 * (((uint64_t)x4 * x15) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + ((uint64_t)x15 * x4))))))))));
+ { uint64_t x26 = (x25 >> 0x16);
+ { uint32_t x27 = ((uint32_t)x25 & 0x3fffff);
+ { uint64_t x28 = (x26 + x24);
+ { uint64_t x29 = (x28 >> 0x16);
+ { uint32_t x30 = ((uint32_t)x28 & 0x3fffff);
+ { uint64_t x31 = (x29 + x23);
+ { uint64_t x32 = (x31 >> 0x16);
+ { uint32_t x33 = ((uint32_t)x31 & 0x3fffff);
+ { uint64_t x34 = (x32 + x22);
+ { uint32_t x35 = (uint32_t) (x34 >> 0x16);
+ { uint32_t x36 = ((uint32_t)x34 & 0x3fffff);
+ { uint64_t x37 = (x35 + x21);
+ { uint32_t x38 = (uint32_t) (x37 >> 0x16);
+ { uint32_t x39 = ((uint32_t)x37 & 0x3fffff);
+ { uint64_t x40 = (x38 + x20);
+ { uint32_t x41 = (uint32_t) (x40 >> 0x16);
+ { uint32_t x42 = ((uint32_t)x40 & 0x3fffff);
+ { uint64_t x43 = (x41 + x19);
+ { uint32_t x44 = (uint32_t) (x43 >> 0x16);
+ { uint32_t x45 = ((uint32_t)x43 & 0x3fffff);
+ { uint64_t x46 = (x44 + x18);
+ { uint32_t x47 = (uint32_t) (x46 >> 0x16);
+ { uint32_t x48 = ((uint32_t)x46 & 0x3fffff);
+ { uint64_t x49 = (x47 + x17);
+ { uint32_t x50 = (uint32_t) (x49 >> 0x16);
+ { uint32_t x51 = ((uint32_t)x49 & 0x3fffff);
+ { uint64_t x52 = (x27 + ((uint64_t)0x11 * x50));
+ { uint32_t x53 = (uint32_t) (x52 >> 0x16);
+ { uint32_t x54 = ((uint32_t)x52 & 0x3fffff);
+ { uint32_t x55 = (x53 + x30);
+ { uint32_t x56 = (x55 >> 0x16);
+ { uint32_t x57 = (x55 & 0x3fffff);
+ out[0] = x54;
+ out[1] = x57;
+ out[2] = (x56 + x33);
+ out[3] = x36;
+ out[4] = x39;
+ out[5] = x42;
+ out[6] = x45;
+ out[7] = x48;
+ out[8] = x51;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e198m17/freeze.c b/src/Specific/solinas32_2e198m17/freeze.c
index 256e5606e..607572e4f 100644
--- a/src/Specific/solinas32_2e198m17/freeze.c
+++ b/src/Specific/solinas32_2e198m17/freeze.c
@@ -1,25 +1,49 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x18;
-out[1] = uint8_t x19 = Op Syntax.SubWithGetBorrow 22 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3fffef;;
+static void freeze(uint32_t out[9], const uint32_t in1[9]) {
+ { const uint32_t x15 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x3fffef);
+ { uint32_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x19, Return x4, 0x3fffff);
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x22, Return x6, 0x3fffff);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x8, 0x3fffff);
+ { uint32_t x30, uint8_t x31 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x28, Return x10, 0x3fffff);
+ { uint32_t x33, uint8_t x34 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x31, Return x12, 0x3fffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x34, Return x14, 0x3fffff);
+ { uint32_t x39, uint8_t x40 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x16, 0x3fffff);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x40, Return x15, 0x3fffff);
+ { uint32_t x44 = (uint32_t)cmovznz(x43, 0x0, 0xffffffff);
+ { uint32_t x45 = (x44 & 0x3fffef);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x18, Return x45);
+ { uint32_t x49 = (x44 & 0x3fffff);
+ { uint32_t x51, uint8_t x52 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x48, Return x21, Return x49);
+ { uint32_t x53 = (x44 & 0x3fffff);
+ { uint32_t x55, uint8_t x56 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x52, Return x24, Return x53);
+ { uint32_t x57 = (x44 & 0x3fffff);
+ { uint32_t x59, uint8_t x60 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x56, Return x27, Return x57);
+ { uint32_t x61 = (x44 & 0x3fffff);
+ { uint32_t x63, uint8_t x64 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x30, Return x61);
+ { uint32_t x65 = (x44 & 0x3fffff);
+ { uint32_t x67, uint8_t x68 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x64, Return x33, Return x65);
+ { uint32_t x69 = (x44 & 0x3fffff);
+ { uint32_t x71, uint8_t x72 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x68, Return x36, Return x69);
+ { uint32_t x73 = (x44 & 0x3fffff);
+ { uint32_t x75, uint8_t x76 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x39, Return x73);
+ { uint32_t x77 = (x44 & 0x3fffff);
+ { uint32_t x79, uint8_t _ = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x76, Return x42, Return x77);
+ out[0] = x47;
+ out[1] = x51;
+ out[2] = x55;
+ out[3] = x59;
+ out[4] = x63;
+ out[5] = x67;
+ out[6] = x71;
+ out[7] = x75;
+ out[8] = x79;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e205m45x2e198m1/freeze.c b/src/Specific/solinas32_2e205m45x2e198m1/freeze.c
index f34680b5a..133001801 100644
--- a/src/Specific/solinas32_2e205m45x2e198m1/freeze.c
+++ b/src/Specific/solinas32_2e205m45x2e198m1/freeze.c
@@ -1,25 +1,54 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x20;
-out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 21 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x1fffff;;
+static void freeze(uint32_t out[10], const uint32_t in1[10]) {
+ { const uint32_t x17 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x20, uint8_t x21 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x1fffff);
+ { uint32_t x23, uint8_t x24 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x21, Return x4, 0xfffff);
+ { uint32_t x26, uint8_t x27 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x24, Return x6, 0x1fffff);
+ { uint32_t x29, uint8_t x30 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x27, Return x8, 0xfffff);
+ { uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x30, Return x10, 0x1fffff);
+ { uint32_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x12, 0xfffff);
+ { uint32_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x36, Return x14, 0x1fffff);
+ { uint32_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x39, Return x16, 0xfffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x18, 0x1fffff);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x17, 0xa5fff);
+ { uint32_t x49 = (uint32_t)cmovznz(x48, 0x0, 0xffffffff);
+ { uint32_t x50 = (x49 & 0x1fffff);
+ { uint32_t x52, uint8_t x53 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x20, Return x50);
+ { uint32_t x54 = (x49 & 0xfffff);
+ { uint32_t x56, uint8_t x57 = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x53, Return x23, Return x54);
+ { uint32_t x58 = (x49 & 0x1fffff);
+ { uint32_t x60, uint8_t x61 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x26, Return x58);
+ { uint32_t x62 = (x49 & 0xfffff);
+ { uint32_t x64, uint8_t x65 = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x61, Return x29, Return x62);
+ { uint32_t x66 = (x49 & 0x1fffff);
+ { uint32_t x68, uint8_t x69 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x65, Return x32, Return x66);
+ { uint32_t x70 = (x49 & 0xfffff);
+ { uint32_t x72, uint8_t x73 = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x69, Return x35, Return x70);
+ { uint32_t x74 = (x49 & 0x1fffff);
+ { uint32_t x76, uint8_t x77 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x73, Return x38, Return x74);
+ { uint32_t x78 = (x49 & 0xfffff);
+ { uint32_t x80, uint8_t x81 = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x77, Return x41, Return x78);
+ { uint32_t x82 = (x49 & 0x1fffff);
+ { uint32_t x84, uint8_t x85 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x81, Return x44, Return x82);
+ { uint32_t x86 = (x49 & 0xa5fff);
+ { uint32_t x88, uint8_t _ = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x85, Return x47, Return x86);
+ out[0] = x52;
+ out[1] = x56;
+ out[2] = x60;
+ out[3] = x64;
+ out[4] = x68;
+ out[5] = x72;
+ out[6] = x76;
+ out[7] = x80;
+ out[8] = x84;
+ out[9] = x88;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e206m5/femul.c b/src/Specific/solinas32_2e206m5/femul.c
index a3f804790..131af4e31 100644
--- a/src/Specific/solinas32_2e206m5/femul.c
+++ b/src/Specific/solinas32_2e206m5/femul.c
@@ -1,86 +1,92 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
-{ uint64_t x48 = (((uint64_t)x5 * x46) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + ((0x2 * ((uint64_t)x11 * x43)) + ((0x2 * ((uint64_t)x13 * x41)) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((0x2 * ((uint64_t)x19 * x35)) + ((0x2 * ((uint64_t)x21 * x33)) + ((0x2 * ((uint64_t)x23 * x31)) + ((0x2 * ((uint64_t)x25 * x29)) + ((uint64_t)x24 * x27))))))))))));
-{ uint64_t x49 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + ((0x2 * ((uint64_t)x9 * x43)) + ((0x2 * ((uint64_t)x11 * x41)) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + ((0x2 * ((uint64_t)x19 * x33)) + ((0x2 * ((uint64_t)x21 * x31)) + ((0x2 * ((uint64_t)x23 * x29)) + ((uint64_t)x25 * x27))))))))))) + (0x5 * ((uint64_t)x24 * x46)));
-{ uint64_t x50 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + ((0x2 * ((uint64_t)x21 * x29)) + ((uint64_t)x23 * x27)))))))))) + (0x5 * (((uint64_t)x25 * x46) + ((uint64_t)x24 * x47))));
-{ uint64_t x51 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x21 * x27))))))))) + (0x5 * (((uint64_t)x23 * x46) + (((uint64_t)x25 * x47) + ((uint64_t)x24 * x45)))));
-{ uint64_t x52 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x19 * x27)))))))) + (0x5 * (((uint64_t)x21 * x46) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + ((uint64_t)x24 * x43))))));
-{ uint64_t x53 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + ((0x2 * ((uint64_t)x11 * x33)) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((uint64_t)x17 * x27))))))) + (0x5 * ((0x2 * ((uint64_t)x19 * x46)) + ((0x2 * ((uint64_t)x21 * x47)) + ((0x2 * ((uint64_t)x23 * x45)) + ((0x2 * ((uint64_t)x25 * x43)) + (0x2 * ((uint64_t)x24 * x41))))))));
-{ uint64_t x54 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + ((0x2 * ((uint64_t)x9 * x33)) + ((0x2 * ((uint64_t)x11 * x31)) + ((0x2 * ((uint64_t)x13 * x29)) + ((uint64_t)x15 * x27)))))) + (0x5 * (((uint64_t)x17 * x46) + ((0x2 * ((uint64_t)x19 * x47)) + ((0x2 * ((uint64_t)x21 * x45)) + ((0x2 * ((uint64_t)x23 * x43)) + ((0x2 * ((uint64_t)x25 * x41)) + ((uint64_t)x24 * x39))))))));
-{ uint64_t x55 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((uint64_t)x13 * x27))))) + (0x5 * (((uint64_t)x15 * x46) + (((uint64_t)x17 * x47) + ((0x2 * ((uint64_t)x19 * x45)) + ((0x2 * ((uint64_t)x21 * x43)) + ((0x2 * ((uint64_t)x23 * x41)) + (((uint64_t)x25 * x39) + ((uint64_t)x24 * x37)))))))));
-{ uint64_t x56 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((uint64_t)x11 * x27)))) + (0x5 * (((uint64_t)x13 * x46) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + (((uint64_t)x23 * x39) + (((uint64_t)x25 * x37) + ((uint64_t)x24 * x35))))))))));
-{ uint64_t x57 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((uint64_t)x9 * x27))) + (0x5 * (((uint64_t)x11 * x46) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((0x2 * ((uint64_t)x19 * x41)) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + (((uint64_t)x25 * x35) + ((uint64_t)x24 * x33)))))))))));
-{ uint64_t x58 = ((((uint64_t)x5 * x29) + ((uint64_t)x7 * x27)) + (0x5 * (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31))))))))))));
-{ uint64_t x59 = (((uint64_t)x5 * x27) + (0x5 * ((0x2 * ((uint64_t)x7 * x46)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + (((uint64_t)x17 * x39) + ((0x2 * ((uint64_t)x19 * x37)) + ((0x2 * ((uint64_t)x21 * x35)) + ((0x2 * ((uint64_t)x23 * x33)) + ((0x2 * ((uint64_t)x25 * x31)) + (0x2 * ((uint64_t)x24 * x29))))))))))))));
-{ uint32_t x60 = (uint32_t) (x59 >> 0x12);
-{ uint32_t x61 = ((uint32_t)x59 & 0x3ffff);
-{ uint64_t x62 = (x60 + x58);
-{ uint32_t x63 = (uint32_t) (x62 >> 0x11);
-{ uint32_t x64 = ((uint32_t)x62 & 0x1ffff);
-{ uint64_t x65 = (x63 + x57);
-{ uint32_t x66 = (uint32_t) (x65 >> 0x11);
-{ uint32_t x67 = ((uint32_t)x65 & 0x1ffff);
-{ uint64_t x68 = (x66 + x56);
-{ uint32_t x69 = (uint32_t) (x68 >> 0x11);
-{ uint32_t x70 = ((uint32_t)x68 & 0x1ffff);
-{ uint64_t x71 = (x69 + x55);
-{ uint32_t x72 = (uint32_t) (x71 >> 0x11);
-{ uint32_t x73 = ((uint32_t)x71 & 0x1ffff);
-{ uint64_t x74 = (x72 + x54);
-{ uint32_t x75 = (uint32_t) (x74 >> 0x11);
-{ uint32_t x76 = ((uint32_t)x74 & 0x1ffff);
-{ uint64_t x77 = (x75 + x53);
-{ uint32_t x78 = (uint32_t) (x77 >> 0x12);
-{ uint32_t x79 = ((uint32_t)x77 & 0x3ffff);
-{ uint64_t x80 = (x78 + x52);
-{ uint32_t x81 = (uint32_t) (x80 >> 0x11);
-{ uint32_t x82 = ((uint32_t)x80 & 0x1ffff);
-{ uint64_t x83 = (x81 + x51);
-{ uint32_t x84 = (uint32_t) (x83 >> 0x11);
-{ uint32_t x85 = ((uint32_t)x83 & 0x1ffff);
-{ uint64_t x86 = (x84 + x50);
-{ uint32_t x87 = (uint32_t) (x86 >> 0x11);
-{ uint32_t x88 = ((uint32_t)x86 & 0x1ffff);
-{ uint64_t x89 = (x87 + x49);
-{ uint32_t x90 = (uint32_t) (x89 >> 0x11);
-{ uint32_t x91 = ((uint32_t)x89 & 0x1ffff);
-{ uint64_t x92 = (x90 + x48);
-{ uint32_t x93 = (uint32_t) (x92 >> 0x11);
-{ uint32_t x94 = ((uint32_t)x92 & 0x1ffff);
-{ uint32_t x95 = (x61 + (0x5 * x93));
-{ uint32_t x96 = (x95 >> 0x12);
-{ uint32_t x97 = (x95 & 0x3ffff);
-{ uint32_t x98 = (x96 + x64);
-{ uint32_t x99 = (x98 >> 0x11);
-{ uint32_t x100 = (x98 & 0x1ffff);
-out[0] = x94;
-out[1] = x91;
-out[2] = x88;
-out[3] = x85;
-out[4] = x82;
-out[5] = x79;
-out[6] = x76;
-out[7] = x73;
-out[8] = x70;
-out[9] = x99 + x67;
-out[10] = x100;
-out[11] = x97;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void femul(uint32_t out[12], const uint32_t in1[12], const uint32_t in2[12]) {
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x46 = in2[11];
+ { const uint32_t x47 = in2[10];
+ { const uint32_t x45 = in2[9];
+ { const uint32_t x43 = in2[8];
+ { const uint32_t x41 = in2[7];
+ { const uint32_t x39 = in2[6];
+ { const uint32_t x37 = in2[5];
+ { const uint32_t x35 = in2[4];
+ { const uint32_t x33 = in2[3];
+ { const uint32_t x31 = in2[2];
+ { const uint32_t x29 = in2[1];
+ { const uint32_t x27 = in2[0];
+ { uint64_t x48 = (((uint64_t)x5 * x46) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + ((0x2 * ((uint64_t)x11 * x43)) + ((0x2 * ((uint64_t)x13 * x41)) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((0x2 * ((uint64_t)x19 * x35)) + ((0x2 * ((uint64_t)x21 * x33)) + ((0x2 * ((uint64_t)x23 * x31)) + ((0x2 * ((uint64_t)x25 * x29)) + ((uint64_t)x24 * x27))))))))))));
+ { uint64_t x49 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + ((0x2 * ((uint64_t)x9 * x43)) + ((0x2 * ((uint64_t)x11 * x41)) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + ((0x2 * ((uint64_t)x19 * x33)) + ((0x2 * ((uint64_t)x21 * x31)) + ((0x2 * ((uint64_t)x23 * x29)) + ((uint64_t)x25 * x27))))))))))) + (0x5 * ((uint64_t)x24 * x46)));
+ { uint64_t x50 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + ((0x2 * ((uint64_t)x21 * x29)) + ((uint64_t)x23 * x27)))))))))) + (0x5 * (((uint64_t)x25 * x46) + ((uint64_t)x24 * x47))));
+ { uint64_t x51 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x21 * x27))))))))) + (0x5 * (((uint64_t)x23 * x46) + (((uint64_t)x25 * x47) + ((uint64_t)x24 * x45)))));
+ { uint64_t x52 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x19 * x27)))))))) + (0x5 * (((uint64_t)x21 * x46) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + ((uint64_t)x24 * x43))))));
+ { uint64_t x53 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + ((0x2 * ((uint64_t)x11 * x33)) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((uint64_t)x17 * x27))))))) + (0x5 * ((0x2 * ((uint64_t)x19 * x46)) + ((0x2 * ((uint64_t)x21 * x47)) + ((0x2 * ((uint64_t)x23 * x45)) + ((0x2 * ((uint64_t)x25 * x43)) + (0x2 * ((uint64_t)x24 * x41))))))));
+ { uint64_t x54 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + ((0x2 * ((uint64_t)x9 * x33)) + ((0x2 * ((uint64_t)x11 * x31)) + ((0x2 * ((uint64_t)x13 * x29)) + ((uint64_t)x15 * x27)))))) + (0x5 * (((uint64_t)x17 * x46) + ((0x2 * ((uint64_t)x19 * x47)) + ((0x2 * ((uint64_t)x21 * x45)) + ((0x2 * ((uint64_t)x23 * x43)) + ((0x2 * ((uint64_t)x25 * x41)) + ((uint64_t)x24 * x39))))))));
+ { uint64_t x55 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((uint64_t)x13 * x27))))) + (0x5 * (((uint64_t)x15 * x46) + (((uint64_t)x17 * x47) + ((0x2 * ((uint64_t)x19 * x45)) + ((0x2 * ((uint64_t)x21 * x43)) + ((0x2 * ((uint64_t)x23 * x41)) + (((uint64_t)x25 * x39) + ((uint64_t)x24 * x37)))))))));
+ { uint64_t x56 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((uint64_t)x11 * x27)))) + (0x5 * (((uint64_t)x13 * x46) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + (((uint64_t)x23 * x39) + (((uint64_t)x25 * x37) + ((uint64_t)x24 * x35))))))))));
+ { uint64_t x57 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((uint64_t)x9 * x27))) + (0x5 * (((uint64_t)x11 * x46) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((0x2 * ((uint64_t)x19 * x41)) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + (((uint64_t)x25 * x35) + ((uint64_t)x24 * x33)))))))))));
+ { uint64_t x58 = ((((uint64_t)x5 * x29) + ((uint64_t)x7 * x27)) + (0x5 * (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31))))))))))));
+ { uint64_t x59 = (((uint64_t)x5 * x27) + (0x5 * ((0x2 * ((uint64_t)x7 * x46)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + (((uint64_t)x17 * x39) + ((0x2 * ((uint64_t)x19 * x37)) + ((0x2 * ((uint64_t)x21 * x35)) + ((0x2 * ((uint64_t)x23 * x33)) + ((0x2 * ((uint64_t)x25 * x31)) + (0x2 * ((uint64_t)x24 * x29))))))))))))));
+ { uint32_t x60 = (uint32_t) (x59 >> 0x12);
+ { uint32_t x61 = ((uint32_t)x59 & 0x3ffff);
+ { uint64_t x62 = (x60 + x58);
+ { uint32_t x63 = (uint32_t) (x62 >> 0x11);
+ { uint32_t x64 = ((uint32_t)x62 & 0x1ffff);
+ { uint64_t x65 = (x63 + x57);
+ { uint32_t x66 = (uint32_t) (x65 >> 0x11);
+ { uint32_t x67 = ((uint32_t)x65 & 0x1ffff);
+ { uint64_t x68 = (x66 + x56);
+ { uint32_t x69 = (uint32_t) (x68 >> 0x11);
+ { uint32_t x70 = ((uint32_t)x68 & 0x1ffff);
+ { uint64_t x71 = (x69 + x55);
+ { uint32_t x72 = (uint32_t) (x71 >> 0x11);
+ { uint32_t x73 = ((uint32_t)x71 & 0x1ffff);
+ { uint64_t x74 = (x72 + x54);
+ { uint32_t x75 = (uint32_t) (x74 >> 0x11);
+ { uint32_t x76 = ((uint32_t)x74 & 0x1ffff);
+ { uint64_t x77 = (x75 + x53);
+ { uint32_t x78 = (uint32_t) (x77 >> 0x12);
+ { uint32_t x79 = ((uint32_t)x77 & 0x3ffff);
+ { uint64_t x80 = (x78 + x52);
+ { uint32_t x81 = (uint32_t) (x80 >> 0x11);
+ { uint32_t x82 = ((uint32_t)x80 & 0x1ffff);
+ { uint64_t x83 = (x81 + x51);
+ { uint32_t x84 = (uint32_t) (x83 >> 0x11);
+ { uint32_t x85 = ((uint32_t)x83 & 0x1ffff);
+ { uint64_t x86 = (x84 + x50);
+ { uint32_t x87 = (uint32_t) (x86 >> 0x11);
+ { uint32_t x88 = ((uint32_t)x86 & 0x1ffff);
+ { uint64_t x89 = (x87 + x49);
+ { uint32_t x90 = (uint32_t) (x89 >> 0x11);
+ { uint32_t x91 = ((uint32_t)x89 & 0x1ffff);
+ { uint64_t x92 = (x90 + x48);
+ { uint32_t x93 = (uint32_t) (x92 >> 0x11);
+ { uint32_t x94 = ((uint32_t)x92 & 0x1ffff);
+ { uint32_t x95 = (x61 + (0x5 * x93));
+ { uint32_t x96 = (x95 >> 0x12);
+ { uint32_t x97 = (x95 & 0x3ffff);
+ { uint32_t x98 = (x96 + x64);
+ { uint32_t x99 = (x98 >> 0x11);
+ { uint32_t x100 = (x98 & 0x1ffff);
+ out[0] = x97;
+ out[1] = x100;
+ out[2] = (x99 + x67);
+ out[3] = x70;
+ out[4] = x73;
+ out[5] = x76;
+ out[6] = x79;
+ out[7] = x82;
+ out[8] = x85;
+ out[9] = x88;
+ out[10] = x91;
+ out[11] = x94;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e206m5/fesquare.c b/src/Specific/solinas32_2e206m5/fesquare.c
index fb8fba0cc..d2fc4606a 100644
--- a/src/Specific/solinas32_2e206m5/fesquare.c
+++ b/src/Specific/solinas32_2e206m5/fesquare.c
@@ -1,86 +1,80 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x23 = (((uint64_t)x2 * x21) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + ((0x2 * ((uint64_t)x8 * x18)) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + ((0x2 * ((uint64_t)x18 * x8)) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x21 * x2))))))))))));
-{ uint64_t x24 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x5 * ((uint64_t)x21 * x21)));
-{ uint64_t x25 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x5 * (((uint64_t)x22 * x21) + ((uint64_t)x21 * x22))));
-{ uint64_t x26 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x5 * (((uint64_t)x20 * x21) + (((uint64_t)x22 * x22) + ((uint64_t)x21 * x20)))));
-{ uint64_t x27 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x5 * (((uint64_t)x18 * x21) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x21 * x18))))));
-{ uint64_t x28 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x5 * ((0x2 * ((uint64_t)x16 * x21)) + ((0x2 * ((uint64_t)x18 * x22)) + ((0x2 * ((uint64_t)x20 * x20)) + ((0x2 * ((uint64_t)x22 * x18)) + (0x2 * ((uint64_t)x21 * x16))))))));
-{ uint64_t x29 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x5 * (((uint64_t)x14 * x21) + ((0x2 * ((uint64_t)x16 * x22)) + ((0x2 * ((uint64_t)x18 * x20)) + ((0x2 * ((uint64_t)x20 * x18)) + ((0x2 * ((uint64_t)x22 * x16)) + ((uint64_t)x21 * x14))))))));
-{ uint64_t x30 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x5 * (((uint64_t)x12 * x21) + (((uint64_t)x14 * x22) + ((0x2 * ((uint64_t)x16 * x20)) + ((0x2 * ((uint64_t)x18 * x18)) + ((0x2 * ((uint64_t)x20 * x16)) + (((uint64_t)x22 * x14) + ((uint64_t)x21 * x12)))))))));
-{ uint64_t x31 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x5 * (((uint64_t)x10 * x21) + (((uint64_t)x12 * x22) + (((uint64_t)x14 * x20) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + (((uint64_t)x20 * x14) + (((uint64_t)x22 * x12) + ((uint64_t)x21 * x10))))))))));
-{ uint64_t x32 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x5 * (((uint64_t)x8 * x21) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + ((0x2 * ((uint64_t)x16 * x16)) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + ((uint64_t)x21 * x8)))))))))));
-{ uint64_t x33 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x5 * (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6))))))))))));
-{ uint64_t x34 = (((uint64_t)x2 * x2) + (0x5 * ((0x2 * ((uint64_t)x4 * x21)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + (0x2 * ((uint64_t)x21 * x4))))))))))))));
-{ uint32_t x35 = (uint32_t) (x34 >> 0x12);
-{ uint32_t x36 = ((uint32_t)x34 & 0x3ffff);
-{ uint64_t x37 = (x35 + x33);
-{ uint32_t x38 = (uint32_t) (x37 >> 0x11);
-{ uint32_t x39 = ((uint32_t)x37 & 0x1ffff);
-{ uint64_t x40 = (x38 + x32);
-{ uint32_t x41 = (uint32_t) (x40 >> 0x11);
-{ uint32_t x42 = ((uint32_t)x40 & 0x1ffff);
-{ uint64_t x43 = (x41 + x31);
-{ uint32_t x44 = (uint32_t) (x43 >> 0x11);
-{ uint32_t x45 = ((uint32_t)x43 & 0x1ffff);
-{ uint64_t x46 = (x44 + x30);
-{ uint32_t x47 = (uint32_t) (x46 >> 0x11);
-{ uint32_t x48 = ((uint32_t)x46 & 0x1ffff);
-{ uint64_t x49 = (x47 + x29);
-{ uint32_t x50 = (uint32_t) (x49 >> 0x11);
-{ uint32_t x51 = ((uint32_t)x49 & 0x1ffff);
-{ uint64_t x52 = (x50 + x28);
-{ uint32_t x53 = (uint32_t) (x52 >> 0x12);
-{ uint32_t x54 = ((uint32_t)x52 & 0x3ffff);
-{ uint64_t x55 = (x53 + x27);
-{ uint32_t x56 = (uint32_t) (x55 >> 0x11);
-{ uint32_t x57 = ((uint32_t)x55 & 0x1ffff);
-{ uint64_t x58 = (x56 + x26);
-{ uint32_t x59 = (uint32_t) (x58 >> 0x11);
-{ uint32_t x60 = ((uint32_t)x58 & 0x1ffff);
-{ uint64_t x61 = (x59 + x25);
-{ uint32_t x62 = (uint32_t) (x61 >> 0x11);
-{ uint32_t x63 = ((uint32_t)x61 & 0x1ffff);
-{ uint64_t x64 = (x62 + x24);
-{ uint32_t x65 = (uint32_t) (x64 >> 0x11);
-{ uint32_t x66 = ((uint32_t)x64 & 0x1ffff);
-{ uint64_t x67 = (x65 + x23);
-{ uint32_t x68 = (uint32_t) (x67 >> 0x11);
-{ uint32_t x69 = ((uint32_t)x67 & 0x1ffff);
-{ uint32_t x70 = (x36 + (0x5 * x68));
-{ uint32_t x71 = (x70 >> 0x12);
-{ uint32_t x72 = (x70 & 0x3ffff);
-{ uint32_t x73 = (x71 + x39);
-{ uint32_t x74 = (x73 >> 0x11);
-{ uint32_t x75 = (x73 & 0x1ffff);
-out[0] = x69;
-out[1] = x66;
-out[2] = x63;
-out[3] = x60;
-out[4] = x57;
-out[5] = x54;
-out[6] = x51;
-out[7] = x48;
-out[8] = x45;
-out[9] = x74 + x42;
-out[10] = x75;
-out[11] = x72;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void fesquare(uint32_t out[12], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x23 = (((uint64_t)x2 * x21) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + ((0x2 * ((uint64_t)x8 * x18)) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + ((0x2 * ((uint64_t)x18 * x8)) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x21 * x2))))))))))));
+ { uint64_t x24 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x5 * ((uint64_t)x21 * x21)));
+ { uint64_t x25 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x5 * (((uint64_t)x22 * x21) + ((uint64_t)x21 * x22))));
+ { uint64_t x26 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x5 * (((uint64_t)x20 * x21) + (((uint64_t)x22 * x22) + ((uint64_t)x21 * x20)))));
+ { uint64_t x27 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x5 * (((uint64_t)x18 * x21) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x21 * x18))))));
+ { uint64_t x28 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x5 * ((0x2 * ((uint64_t)x16 * x21)) + ((0x2 * ((uint64_t)x18 * x22)) + ((0x2 * ((uint64_t)x20 * x20)) + ((0x2 * ((uint64_t)x22 * x18)) + (0x2 * ((uint64_t)x21 * x16))))))));
+ { uint64_t x29 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x5 * (((uint64_t)x14 * x21) + ((0x2 * ((uint64_t)x16 * x22)) + ((0x2 * ((uint64_t)x18 * x20)) + ((0x2 * ((uint64_t)x20 * x18)) + ((0x2 * ((uint64_t)x22 * x16)) + ((uint64_t)x21 * x14))))))));
+ { uint64_t x30 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x5 * (((uint64_t)x12 * x21) + (((uint64_t)x14 * x22) + ((0x2 * ((uint64_t)x16 * x20)) + ((0x2 * ((uint64_t)x18 * x18)) + ((0x2 * ((uint64_t)x20 * x16)) + (((uint64_t)x22 * x14) + ((uint64_t)x21 * x12)))))))));
+ { uint64_t x31 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x5 * (((uint64_t)x10 * x21) + (((uint64_t)x12 * x22) + (((uint64_t)x14 * x20) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + (((uint64_t)x20 * x14) + (((uint64_t)x22 * x12) + ((uint64_t)x21 * x10))))))))));
+ { uint64_t x32 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x5 * (((uint64_t)x8 * x21) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + ((0x2 * ((uint64_t)x16 * x16)) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + ((uint64_t)x21 * x8)))))))))));
+ { uint64_t x33 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x5 * (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6))))))))))));
+ { uint64_t x34 = (((uint64_t)x2 * x2) + (0x5 * ((0x2 * ((uint64_t)x4 * x21)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + (0x2 * ((uint64_t)x21 * x4))))))))))))));
+ { uint32_t x35 = (uint32_t) (x34 >> 0x12);
+ { uint32_t x36 = ((uint32_t)x34 & 0x3ffff);
+ { uint64_t x37 = (x35 + x33);
+ { uint32_t x38 = (uint32_t) (x37 >> 0x11);
+ { uint32_t x39 = ((uint32_t)x37 & 0x1ffff);
+ { uint64_t x40 = (x38 + x32);
+ { uint32_t x41 = (uint32_t) (x40 >> 0x11);
+ { uint32_t x42 = ((uint32_t)x40 & 0x1ffff);
+ { uint64_t x43 = (x41 + x31);
+ { uint32_t x44 = (uint32_t) (x43 >> 0x11);
+ { uint32_t x45 = ((uint32_t)x43 & 0x1ffff);
+ { uint64_t x46 = (x44 + x30);
+ { uint32_t x47 = (uint32_t) (x46 >> 0x11);
+ { uint32_t x48 = ((uint32_t)x46 & 0x1ffff);
+ { uint64_t x49 = (x47 + x29);
+ { uint32_t x50 = (uint32_t) (x49 >> 0x11);
+ { uint32_t x51 = ((uint32_t)x49 & 0x1ffff);
+ { uint64_t x52 = (x50 + x28);
+ { uint32_t x53 = (uint32_t) (x52 >> 0x12);
+ { uint32_t x54 = ((uint32_t)x52 & 0x3ffff);
+ { uint64_t x55 = (x53 + x27);
+ { uint32_t x56 = (uint32_t) (x55 >> 0x11);
+ { uint32_t x57 = ((uint32_t)x55 & 0x1ffff);
+ { uint64_t x58 = (x56 + x26);
+ { uint32_t x59 = (uint32_t) (x58 >> 0x11);
+ { uint32_t x60 = ((uint32_t)x58 & 0x1ffff);
+ { uint64_t x61 = (x59 + x25);
+ { uint32_t x62 = (uint32_t) (x61 >> 0x11);
+ { uint32_t x63 = ((uint32_t)x61 & 0x1ffff);
+ { uint64_t x64 = (x62 + x24);
+ { uint32_t x65 = (uint32_t) (x64 >> 0x11);
+ { uint32_t x66 = ((uint32_t)x64 & 0x1ffff);
+ { uint64_t x67 = (x65 + x23);
+ { uint32_t x68 = (uint32_t) (x67 >> 0x11);
+ { uint32_t x69 = ((uint32_t)x67 & 0x1ffff);
+ { uint32_t x70 = (x36 + (0x5 * x68));
+ { uint32_t x71 = (x70 >> 0x12);
+ { uint32_t x72 = (x70 & 0x3ffff);
+ { uint32_t x73 = (x71 + x39);
+ { uint32_t x74 = (x73 >> 0x11);
+ { uint32_t x75 = (x73 & 0x1ffff);
+ out[0] = x72;
+ out[1] = x75;
+ out[2] = (x74 + x42);
+ out[3] = x45;
+ out[4] = x48;
+ out[5] = x51;
+ out[6] = x54;
+ out[7] = x57;
+ out[8] = x60;
+ out[9] = x63;
+ out[10] = x66;
+ out[11] = x69;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e206m5/freeze.c b/src/Specific/solinas32_2e206m5/freeze.c
index 77a911340..a573bc45c 100644
--- a/src/Specific/solinas32_2e206m5/freeze.c
+++ b/src/Specific/solinas32_2e206m5/freeze.c
@@ -1,25 +1,64 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x24;
-out[1] = uint8_t x25 = Op Syntax.SubWithGetBorrow 18 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3fffb;;
+static void freeze(uint32_t out[12], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x3fffb);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x4, 0x1ffff);
+ { uint32_t x30, uint8_t x31 = Op (Syntax.SubWithGetBorrow 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x28, Return x6, 0x1ffff);
+ { uint32_t x33, uint8_t x34 = Op (Syntax.SubWithGetBorrow 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x31, Return x8, 0x1ffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.SubWithGetBorrow 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x34, Return x10, 0x1ffff);
+ { uint32_t x39, uint8_t x40 = Op (Syntax.SubWithGetBorrow 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x12, 0x1ffff);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x40, Return x14, 0x3ffff);
+ { uint32_t x45, uint8_t x46 = Op (Syntax.SubWithGetBorrow 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x43, Return x16, 0x1ffff);
+ { uint32_t x48, uint8_t x49 = Op (Syntax.SubWithGetBorrow 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x46, Return x18, 0x1ffff);
+ { uint32_t x51, uint8_t x52 = Op (Syntax.SubWithGetBorrow 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x49, Return x20, 0x1ffff);
+ { uint32_t x54, uint8_t x55 = Op (Syntax.SubWithGetBorrow 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x52, Return x22, 0x1ffff);
+ { uint32_t x57, uint8_t x58 = Op (Syntax.SubWithGetBorrow 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x55, Return x21, 0x1ffff);
+ { uint32_t x59 = (uint32_t)cmovznz(x58, 0x0, 0xffffffff);
+ { uint32_t x60 = (x59 & 0x3fffb);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x24, Return x60);
+ { uint32_t x64 = (x59 & 0x1ffff);
+ { uint32_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x27, Return x64);
+ { uint32_t x68 = (x59 & 0x1ffff);
+ { uint32_t x70, uint8_t x71 = Op (Syntax.AddWithGetCarry 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x67, Return x30, Return x68);
+ { uint32_t x72 = (x59 & 0x1ffff);
+ { uint32_t x74, uint8_t x75 = Op (Syntax.AddWithGetCarry 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x71, Return x33, Return x72);
+ { uint32_t x76 = (x59 & 0x1ffff);
+ { uint32_t x78, uint8_t x79 = Op (Syntax.AddWithGetCarry 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x36, Return x76);
+ { uint32_t x80 = (x59 & 0x1ffff);
+ { uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x79, Return x39, Return x80);
+ { uint32_t x84 = (x59 & 0x3ffff);
+ { uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x42, Return x84);
+ { uint32_t x88 = (x59 & 0x1ffff);
+ { uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x45, Return x88);
+ { uint32_t x92 = (x59 & 0x1ffff);
+ { uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x48, Return x92);
+ { uint32_t x96 = (x59 & 0x1ffff);
+ { uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x51, Return x96);
+ { uint32_t x100 = (x59 & 0x1ffff);
+ { uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x54, Return x100);
+ { uint32_t x104 = (x59 & 0x1ffff);
+ { uint32_t x106, uint8_t _ = Op (Syntax.AddWithGetCarry 17 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x57, Return x104);
+ out[0] = x62;
+ out[1] = x66;
+ out[2] = x70;
+ out[3] = x74;
+ out[4] = x78;
+ out[5] = x82;
+ out[6] = x86;
+ out[7] = x90;
+ out[8] = x94;
+ out[9] = x98;
+ out[10] = x102;
+ out[11] = x106;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e212m29/femul.c b/src/Specific/solinas32_2e212m29/femul.c
index 56a5f72be..bdce0055e 100644
--- a/src/Specific/solinas32_2e212m29/femul.c
+++ b/src/Specific/solinas32_2e212m29/femul.c
@@ -1,66 +1,64 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint64_t x32 = (((uint64_t)x5 * x30) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + (((uint64_t)x17 * x21) + ((uint64_t)x16 * x19))))))));
-{ uint64_t x33 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + (((uint64_t)x9 * x27) + ((0x2 * ((uint64_t)x11 * x25)) + (((uint64_t)x13 * x23) + ((0x2 * ((uint64_t)x15 * x21)) + ((uint64_t)x17 * x19))))))) + (0x1d * (0x2 * ((uint64_t)x16 * x30))));
-{ uint64_t x34 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x15 * x19)))))) + (0x1d * (((uint64_t)x17 * x30) + ((uint64_t)x16 * x31))));
-{ uint64_t x35 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + (((uint64_t)x9 * x23) + ((0x2 * ((uint64_t)x11 * x21)) + ((uint64_t)x13 * x19))))) + (0x1d * ((0x2 * ((uint64_t)x15 * x30)) + (((uint64_t)x17 * x31) + (0x2 * ((uint64_t)x16 * x29))))));
-{ uint64_t x36 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + ((uint64_t)x11 * x19)))) + (0x1d * (((uint64_t)x13 * x30) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x16 * x27))))));
-{ ℤ x37 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + ((uint64_t)x9 * x19))) +ℤ (0x1d *ℤ ((0x2 * ((uint64_t)x11 * x30)) + (((uint64_t)x13 * x31) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + (0x2 * ((uint64_t)x16 * x25))))))));
-{ uint64_t x38 = ((((uint64_t)x5 * x21) + ((uint64_t)x7 * x19)) + (0x1d * (((uint64_t)x9 * x30) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x16 * x23))))))));
-{ ℤ x39 = (((uint64_t)x5 * x19) +ℤ (0x1d *ℤ ((0x2 * ((uint64_t)x7 * x30)) + (((uint64_t)x9 * x31) + ((0x2 * ((uint64_t)x11 * x29)) + (((uint64_t)x13 * x27) + ((0x2 * ((uint64_t)x15 * x25)) + (((uint64_t)x17 * x23) + (0x2 * ((uint64_t)x16 * x21))))))))));
-{ uint64_t x40 = (x39 >> 0x1b);
-{ uint32_t x41 = (x39 & 0x7ffffff);
-{ uint64_t x42 = (x40 + x38);
-{ uint64_t x43 = (x42 >> 0x1a);
-{ uint32_t x44 = ((uint32_t)x42 & 0x3ffffff);
-{ ℤ x45 = (x43 +ℤ x37);
-{ uint64_t x46 = (x45 >> 0x1b);
-{ uint32_t x47 = (x45 & 0x7ffffff);
-{ uint64_t x48 = (x46 + x36);
-{ uint64_t x49 = (x48 >> 0x1a);
-{ uint32_t x50 = ((uint32_t)x48 & 0x3ffffff);
-{ uint64_t x51 = (x49 + x35);
-{ uint64_t x52 = (x51 >> 0x1b);
-{ uint32_t x53 = ((uint32_t)x51 & 0x7ffffff);
-{ uint64_t x54 = (x52 + x34);
-{ uint64_t x55 = (x54 >> 0x1a);
-{ uint32_t x56 = ((uint32_t)x54 & 0x3ffffff);
-{ uint64_t x57 = (x55 + x33);
-{ uint64_t x58 = (x57 >> 0x1b);
-{ uint32_t x59 = ((uint32_t)x57 & 0x7ffffff);
-{ uint64_t x60 = (x58 + x32);
-{ uint64_t x61 = (x60 >> 0x1a);
-{ uint32_t x62 = ((uint32_t)x60 & 0x3ffffff);
-{ uint64_t x63 = (x41 + (0x1d * x61));
-{ uint32_t x64 = (uint32_t) (x63 >> 0x1b);
-{ uint32_t x65 = ((uint32_t)x63 & 0x7ffffff);
-{ uint32_t x66 = (x64 + x44);
-{ uint32_t x67 = (x66 >> 0x1a);
-{ uint32_t x68 = (x66 & 0x3ffffff);
-out[0] = x62;
-out[1] = x59;
-out[2] = x56;
-out[3] = x53;
-out[4] = x50;
-out[5] = x67 + x47;
-out[6] = x68;
-out[7] = x65;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void femul(uint32_t out[8], const uint32_t in1[8], const uint32_t in2[8]) {
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x30 = in2[7];
+ { const uint32_t x31 = in2[6];
+ { const uint32_t x29 = in2[5];
+ { const uint32_t x27 = in2[4];
+ { const uint32_t x25 = in2[3];
+ { const uint32_t x23 = in2[2];
+ { const uint32_t x21 = in2[1];
+ { const uint32_t x19 = in2[0];
+ { uint64_t x32 = (((uint64_t)x5 * x30) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + (((uint64_t)x17 * x21) + ((uint64_t)x16 * x19))))))));
+ { uint64_t x33 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + (((uint64_t)x9 * x27) + ((0x2 * ((uint64_t)x11 * x25)) + (((uint64_t)x13 * x23) + ((0x2 * ((uint64_t)x15 * x21)) + ((uint64_t)x17 * x19))))))) + (0x1d * (0x2 * ((uint64_t)x16 * x30))));
+ { uint64_t x34 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x15 * x19)))))) + (0x1d * (((uint64_t)x17 * x30) + ((uint64_t)x16 * x31))));
+ { uint64_t x35 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + (((uint64_t)x9 * x23) + ((0x2 * ((uint64_t)x11 * x21)) + ((uint64_t)x13 * x19))))) + (0x1d * ((0x2 * ((uint64_t)x15 * x30)) + (((uint64_t)x17 * x31) + (0x2 * ((uint64_t)x16 * x29))))));
+ { uint64_t x36 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + ((uint64_t)x11 * x19)))) + (0x1d * (((uint64_t)x13 * x30) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x16 * x27))))));
+ { ℤ x37 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + ((uint64_t)x9 * x19))) +ℤ (0x1d *ℤ ((0x2 * ((uint64_t)x11 * x30)) + (((uint64_t)x13 * x31) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + (0x2 * ((uint64_t)x16 * x25))))))));
+ { uint64_t x38 = ((((uint64_t)x5 * x21) + ((uint64_t)x7 * x19)) + (0x1d * (((uint64_t)x9 * x30) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x16 * x23))))))));
+ { ℤ x39 = (((uint64_t)x5 * x19) +ℤ (0x1d *ℤ ((0x2 * ((uint64_t)x7 * x30)) + (((uint64_t)x9 * x31) + ((0x2 * ((uint64_t)x11 * x29)) + (((uint64_t)x13 * x27) + ((0x2 * ((uint64_t)x15 * x25)) + (((uint64_t)x17 * x23) + (0x2 * ((uint64_t)x16 * x21))))))))));
+ { uint64_t x40 = (x39 >> 0x1b);
+ { uint32_t x41 = (x39 & 0x7ffffff);
+ { uint64_t x42 = (x40 + x38);
+ { uint64_t x43 = (x42 >> 0x1a);
+ { uint32_t x44 = ((uint32_t)x42 & 0x3ffffff);
+ { ℤ x45 = (x43 +ℤ x37);
+ { uint64_t x46 = (x45 >> 0x1b);
+ { uint32_t x47 = (x45 & 0x7ffffff);
+ { uint64_t x48 = (x46 + x36);
+ { uint64_t x49 = (x48 >> 0x1a);
+ { uint32_t x50 = ((uint32_t)x48 & 0x3ffffff);
+ { uint64_t x51 = (x49 + x35);
+ { uint64_t x52 = (x51 >> 0x1b);
+ { uint32_t x53 = ((uint32_t)x51 & 0x7ffffff);
+ { uint64_t x54 = (x52 + x34);
+ { uint64_t x55 = (x54 >> 0x1a);
+ { uint32_t x56 = ((uint32_t)x54 & 0x3ffffff);
+ { uint64_t x57 = (x55 + x33);
+ { uint64_t x58 = (x57 >> 0x1b);
+ { uint32_t x59 = ((uint32_t)x57 & 0x7ffffff);
+ { uint64_t x60 = (x58 + x32);
+ { uint64_t x61 = (x60 >> 0x1a);
+ { uint32_t x62 = ((uint32_t)x60 & 0x3ffffff);
+ { uint64_t x63 = (x41 + (0x1d * x61));
+ { uint32_t x64 = (uint32_t) (x63 >> 0x1b);
+ { uint32_t x65 = ((uint32_t)x63 & 0x7ffffff);
+ { uint32_t x66 = (x64 + x44);
+ { uint32_t x67 = (x66 >> 0x1a);
+ { uint32_t x68 = (x66 & 0x3ffffff);
+ out[0] = x65;
+ out[1] = x68;
+ out[2] = (x67 + x47);
+ out[3] = x50;
+ out[4] = x53;
+ out[5] = x56;
+ out[6] = x59;
+ out[7] = x62;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e212m29/fesquare.c b/src/Specific/solinas32_2e212m29/fesquare.c
index a7ef68a8d..0ee4a8a9e 100644
--- a/src/Specific/solinas32_2e212m29/fesquare.c
+++ b/src/Specific/solinas32_2e212m29/fesquare.c
@@ -1,66 +1,56 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x15 = (((uint64_t)x2 * x13) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x13 * x2))))))));
-{ uint64_t x16 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + (((uint64_t)x6 * x10) + ((0x2 * ((uint64_t)x8 * x8)) + (((uint64_t)x10 * x6) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x1d * (0x2 * ((uint64_t)x13 * x13))));
-{ uint64_t x17 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x1d * (((uint64_t)x14 * x13) + ((uint64_t)x13 * x14))));
-{ uint64_t x18 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + (((uint64_t)x6 * x6) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x1d * ((0x2 * ((uint64_t)x12 * x13)) + (((uint64_t)x14 * x14) + (0x2 * ((uint64_t)x13 * x12))))));
-{ uint64_t x19 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x1d * (((uint64_t)x10 * x13) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x13 * x10))))));
-{ ℤ x20 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) +ℤ (0x1d *ℤ ((0x2 * ((uint64_t)x8 * x13)) + (((uint64_t)x10 * x14) + ((0x2 * ((uint64_t)x12 * x12)) + (((uint64_t)x14 * x10) + (0x2 * ((uint64_t)x13 * x8))))))));
-{ uint64_t x21 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x1d * (((uint64_t)x6 * x13) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x13 * x6))))))));
-{ ℤ x22 = (((uint64_t)x2 * x2) +ℤ (0x1d *ℤ ((0x2 * ((uint64_t)x4 * x13)) + (((uint64_t)x6 * x14) + ((0x2 * ((uint64_t)x8 * x12)) + (((uint64_t)x10 * x10) + ((0x2 * ((uint64_t)x12 * x8)) + (((uint64_t)x14 * x6) + (0x2 * ((uint64_t)x13 * x4))))))))));
-{ uint64_t x23 = (x22 >> 0x1b);
-{ uint32_t x24 = (x22 & 0x7ffffff);
-{ uint64_t x25 = (x23 + x21);
-{ uint64_t x26 = (x25 >> 0x1a);
-{ uint32_t x27 = ((uint32_t)x25 & 0x3ffffff);
-{ ℤ x28 = (x26 +ℤ x20);
-{ uint64_t x29 = (x28 >> 0x1b);
-{ uint32_t x30 = (x28 & 0x7ffffff);
-{ uint64_t x31 = (x29 + x19);
-{ uint64_t x32 = (x31 >> 0x1a);
-{ uint32_t x33 = ((uint32_t)x31 & 0x3ffffff);
-{ uint64_t x34 = (x32 + x18);
-{ uint64_t x35 = (x34 >> 0x1b);
-{ uint32_t x36 = ((uint32_t)x34 & 0x7ffffff);
-{ uint64_t x37 = (x35 + x17);
-{ uint64_t x38 = (x37 >> 0x1a);
-{ uint32_t x39 = ((uint32_t)x37 & 0x3ffffff);
-{ uint64_t x40 = (x38 + x16);
-{ uint64_t x41 = (x40 >> 0x1b);
-{ uint32_t x42 = ((uint32_t)x40 & 0x7ffffff);
-{ uint64_t x43 = (x41 + x15);
-{ uint64_t x44 = (x43 >> 0x1a);
-{ uint32_t x45 = ((uint32_t)x43 & 0x3ffffff);
-{ uint64_t x46 = (x24 + (0x1d * x44));
-{ uint32_t x47 = (uint32_t) (x46 >> 0x1b);
-{ uint32_t x48 = ((uint32_t)x46 & 0x7ffffff);
-{ uint32_t x49 = (x47 + x27);
-{ uint32_t x50 = (x49 >> 0x1a);
-{ uint32_t x51 = (x49 & 0x3ffffff);
-out[0] = x45;
-out[1] = x42;
-out[2] = x39;
-out[3] = x36;
-out[4] = x33;
-out[5] = x50 + x30;
-out[6] = x51;
-out[7] = x48;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void fesquare(uint32_t out[8], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x15 = (((uint64_t)x2 * x13) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x13 * x2))))))));
+ { uint64_t x16 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + (((uint64_t)x6 * x10) + ((0x2 * ((uint64_t)x8 * x8)) + (((uint64_t)x10 * x6) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x1d * (0x2 * ((uint64_t)x13 * x13))));
+ { uint64_t x17 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x1d * (((uint64_t)x14 * x13) + ((uint64_t)x13 * x14))));
+ { uint64_t x18 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + (((uint64_t)x6 * x6) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x1d * ((0x2 * ((uint64_t)x12 * x13)) + (((uint64_t)x14 * x14) + (0x2 * ((uint64_t)x13 * x12))))));
+ { uint64_t x19 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x1d * (((uint64_t)x10 * x13) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x13 * x10))))));
+ { ℤ x20 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) +ℤ (0x1d *ℤ ((0x2 * ((uint64_t)x8 * x13)) + (((uint64_t)x10 * x14) + ((0x2 * ((uint64_t)x12 * x12)) + (((uint64_t)x14 * x10) + (0x2 * ((uint64_t)x13 * x8))))))));
+ { uint64_t x21 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x1d * (((uint64_t)x6 * x13) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x13 * x6))))))));
+ { ℤ x22 = (((uint64_t)x2 * x2) +ℤ (0x1d *ℤ ((0x2 * ((uint64_t)x4 * x13)) + (((uint64_t)x6 * x14) + ((0x2 * ((uint64_t)x8 * x12)) + (((uint64_t)x10 * x10) + ((0x2 * ((uint64_t)x12 * x8)) + (((uint64_t)x14 * x6) + (0x2 * ((uint64_t)x13 * x4))))))))));
+ { uint64_t x23 = (x22 >> 0x1b);
+ { uint32_t x24 = (x22 & 0x7ffffff);
+ { uint64_t x25 = (x23 + x21);
+ { uint64_t x26 = (x25 >> 0x1a);
+ { uint32_t x27 = ((uint32_t)x25 & 0x3ffffff);
+ { ℤ x28 = (x26 +ℤ x20);
+ { uint64_t x29 = (x28 >> 0x1b);
+ { uint32_t x30 = (x28 & 0x7ffffff);
+ { uint64_t x31 = (x29 + x19);
+ { uint64_t x32 = (x31 >> 0x1a);
+ { uint32_t x33 = ((uint32_t)x31 & 0x3ffffff);
+ { uint64_t x34 = (x32 + x18);
+ { uint64_t x35 = (x34 >> 0x1b);
+ { uint32_t x36 = ((uint32_t)x34 & 0x7ffffff);
+ { uint64_t x37 = (x35 + x17);
+ { uint64_t x38 = (x37 >> 0x1a);
+ { uint32_t x39 = ((uint32_t)x37 & 0x3ffffff);
+ { uint64_t x40 = (x38 + x16);
+ { uint64_t x41 = (x40 >> 0x1b);
+ { uint32_t x42 = ((uint32_t)x40 & 0x7ffffff);
+ { uint64_t x43 = (x41 + x15);
+ { uint64_t x44 = (x43 >> 0x1a);
+ { uint32_t x45 = ((uint32_t)x43 & 0x3ffffff);
+ { uint64_t x46 = (x24 + (0x1d * x44));
+ { uint32_t x47 = (uint32_t) (x46 >> 0x1b);
+ { uint32_t x48 = ((uint32_t)x46 & 0x7ffffff);
+ { uint32_t x49 = (x47 + x27);
+ { uint32_t x50 = (x49 >> 0x1a);
+ { uint32_t x51 = (x49 & 0x3ffffff);
+ out[0] = x48;
+ out[1] = x51;
+ out[2] = (x50 + x30);
+ out[3] = x33;
+ out[4] = x36;
+ out[5] = x39;
+ out[6] = x42;
+ out[7] = x45;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e212m29/freeze.c b/src/Specific/solinas32_2e212m29/freeze.c
index 0e748bcc8..a26e85d79 100644
--- a/src/Specific/solinas32_2e212m29/freeze.c
+++ b/src/Specific/solinas32_2e212m29/freeze.c
@@ -1,25 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x16;
-out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 27 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7ffffe3;;
+static void freeze(uint32_t out[8], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x7ffffe3);
+ { uint32_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x17, Return x4, 0x3ffffff);
+ { uint32_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x20, Return x6, 0x7ffffff);
+ { uint32_t x25, uint8_t x26 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x23, Return x8, 0x3ffffff);
+ { uint32_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x26, Return x10, 0x7ffffff);
+ { uint32_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x29, Return x12, 0x3ffffff);
+ { uint32_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x32, Return x14, 0x7ffffff);
+ { uint32_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x35, Return x13, 0x3ffffff);
+ { uint32_t x39 = (uint32_t)cmovznz(x38, 0x0, 0xffffffff);
+ { uint32_t x40 = (x39 & 0x7ffffe3);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x16, Return x40);
+ { uint32_t x44 = (x39 & 0x3ffffff);
+ { uint32_t x46, uint8_t x47 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x43, Return x19, Return x44);
+ { uint32_t x48 = (x39 & 0x7ffffff);
+ { uint32_t x50, uint8_t x51 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x47, Return x22, Return x48);
+ { uint32_t x52 = (x39 & 0x3ffffff);
+ { uint32_t x54, uint8_t x55 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x51, Return x25, Return x52);
+ { uint32_t x56 = (x39 & 0x7ffffff);
+ { uint32_t x58, uint8_t x59 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x55, Return x28, Return x56);
+ { uint32_t x60 = (x39 & 0x3ffffff);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x59, Return x31, Return x60);
+ { uint32_t x64 = (x39 & 0x7ffffff);
+ { uint32_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x34, Return x64);
+ { uint32_t x68 = (x39 & 0x3ffffff);
+ { uint32_t x70, uint8_t _ = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x67, Return x37, Return x68);
+ out[0] = x42;
+ out[1] = x46;
+ out[2] = x50;
+ out[3] = x54;
+ out[4] = x58;
+ out[5] = x62;
+ out[6] = x66;
+ out[7] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e213m3/femul.c b/src/Specific/solinas32_2e213m3/femul.c
index 4d09e2dc4..897023ace 100644
--- a/src/Specific/solinas32_2e213m3/femul.c
+++ b/src/Specific/solinas32_2e213m3/femul.c
@@ -1,101 +1,113 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x58, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33)
-{ uint64_t x60 = (((uint64_t)x5 * x58) + (((uint64_t)0x2 * (x7 * x59)) + (((uint64_t)0x2 * (x9 * x57)) + (((uint64_t)0x2 * (x11 * x55)) + (((uint64_t)x13 * x53) + (((uint64_t)x15 * x51) + (((uint64_t)0x2 * (x17 * x49)) + (((uint64_t)0x2 * (x19 * x47)) + (((uint64_t)0x2 * (x21 * x45)) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + (((uint64_t)0x2 * (x27 * x39)) + (((uint64_t)0x2 * (x29 * x37)) + (((uint64_t)0x2 * (x31 * x35)) + ((uint64_t)x30 * x33)))))))))))))));
-{ uint64_t x61 = ((((uint64_t)x5 * x59) + (((uint64_t)0x2 * (x7 * x57)) + (((uint64_t)0x2 * (x9 * x55)) + (((uint64_t)x11 * x53) + ((x13 * x51) + (((uint64_t)x15 * x49) + (((uint64_t)0x2 * (x17 * x47)) + (((uint64_t)0x2 * (x19 * x45)) + (((uint64_t)x21 * x43) + ((x23 * x41) + (((uint64_t)x25 * x39) + (((uint64_t)0x2 * (x27 * x37)) + (((uint64_t)0x2 * (x29 * x35)) + ((uint64_t)x31 * x33)))))))))))))) + ((uint64_t)0x3 * (x30 * x58)));
-{ uint64_t x62 = ((((uint64_t)x5 * x57) + (((uint64_t)0x2 * (x7 * x55)) + (((uint64_t)x9 * x53) + ((x11 * x51) + ((x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)0x2 * (x17 * x45)) + (((uint64_t)x19 * x43) + ((x21 * x41) + ((x23 * x39) + (((uint64_t)x25 * x37) + (((uint64_t)0x2 * (x27 * x35)) + ((uint64_t)x29 * x33))))))))))))) + (0x3 * ((uint64_t)(x31 * x58) + (x30 * x59))));
-{ uint64_t x63 = ((((uint64_t)x5 * x55) + (((uint64_t)x7 * x53) + ((x9 * x51) + ((x11 * x49) + ((x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((x19 * x41) + ((x21 * x39) + ((x23 * x37) + (((uint64_t)x25 * x35) + ((uint64_t)x27 * x33)))))))))))) + (0x3 * ((x29 * x58) + ((uint64_t)(x31 * x59) + (x30 * x57)))));
-{ uint64_t x64 = ((((uint64_t)x5 * x53) + (((uint64_t)0x2 * (x7 * x51)) + (((uint64_t)0x2 * (x9 * x49)) + (((uint64_t)0x2 * (x11 * x47)) + (((uint64_t)0x2 * (x13 * x45)) + (((uint64_t)x15 * x43) + (((uint64_t)0x2 * (x17 * x41)) + (((uint64_t)0x2 * (x19 * x39)) + (((uint64_t)0x2 * (x21 * x37)) + (((uint64_t)0x2 * (x23 * x35)) + ((uint64_t)x25 * x33))))))))))) + (0x3 * (((uint64_t)0x2 * (x27 * x58)) + (((uint64_t)0x2 * (x29 * x59)) + (((uint64_t)0x2 * (x31 * x57)) + ((uint64_t)0x2 * (x30 * x55)))))));
-{ uint64_t x65 = ((((uint64_t)x5 * x51) + (((uint64_t)0x2 * (x7 * x49)) + (((uint64_t)0x2 * (x9 * x47)) + (((uint64_t)0x2 * (x11 * x45)) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)0x2 * (x17 * x39)) + (((uint64_t)0x2 * (x19 * x37)) + (((uint64_t)0x2 * (x21 * x35)) + ((uint64_t)x23 * x33)))))))))) + (0x3 * (((uint64_t)x25 * x58) + (((uint64_t)0x2 * (x27 * x59)) + (((uint64_t)0x2 * (x29 * x57)) + (((uint64_t)0x2 * (x31 * x55)) + ((uint64_t)x30 * x53)))))));
-{ uint64_t x66 = ((((uint64_t)x5 * x49) + (((uint64_t)0x2 * (x7 * x47)) + (((uint64_t)0x2 * (x9 * x45)) + (((uint64_t)x11 * x43) + ((x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)0x2 * (x17 * x37)) + (((uint64_t)0x2 * (x19 * x35)) + ((uint64_t)x21 * x33))))))))) + (0x3 * ((x23 * x58) + (((uint64_t)x25 * x59) + (((uint64_t)0x2 * (x27 * x57)) + (((uint64_t)0x2 * (x29 * x55)) + (((uint64_t)x31 * x53) + (x30 * x51))))))));
-{ uint64_t x67 = ((((uint64_t)x5 * x47) + (((uint64_t)0x2 * (x7 * x45)) + (((uint64_t)x9 * x43) + ((x11 * x41) + ((x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)0x2 * (x17 * x35)) + ((uint64_t)x19 * x33)))))))) + (0x3 * ((x21 * x58) + ((x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)0x2 * (x27 * x55)) + (((uint64_t)x29 * x53) + ((uint64_t)(x31 * x51) + (x30 * x49)))))))));
-{ uint64_t x68 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + ((x9 * x41) + ((x11 * x39) + ((x13 * x37) + (((uint64_t)x15 * x35) + ((uint64_t)x17 * x33))))))) + (0x3 * ((x19 * x58) + ((x21 * x59) + ((x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + ((x29 * x51) + ((uint64_t)(x31 * x49) + (x30 * x47))))))))));
-{ uint64_t x69 = ((((uint64_t)x5 * x43) + (((uint64_t)0x2 * (x7 * x41)) + (((uint64_t)0x2 * (x9 * x39)) + (((uint64_t)0x2 * (x11 * x37)) + (((uint64_t)0x2 * (x13 * x35)) + ((uint64_t)x15 * x33)))))) + (0x3 * (((uint64_t)0x2 * (x17 * x58)) + (((uint64_t)0x2 * (x19 * x59)) + (((uint64_t)0x2 * (x21 * x57)) + (((uint64_t)0x2 * (x23 * x55)) + (((uint64_t)x25 * x53) + (((uint64_t)0x2 * (x27 * x51)) + (((uint64_t)0x2 * (x29 * x49)) + (((uint64_t)0x2 * (x31 * x47)) + ((uint64_t)0x2 * (x30 * x45))))))))))));
-{ uint64_t x70 = ((((uint64_t)x5 * x41) + (((uint64_t)0x2 * (x7 * x39)) + (((uint64_t)0x2 * (x9 * x37)) + (((uint64_t)0x2 * (x11 * x35)) + ((uint64_t)x13 * x33))))) + (0x3 * (((uint64_t)x15 * x58) + (((uint64_t)0x2 * (x17 * x59)) + (((uint64_t)0x2 * (x19 * x57)) + (((uint64_t)0x2 * (x21 * x55)) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)0x2 * (x27 * x49)) + (((uint64_t)0x2 * (x29 * x47)) + (((uint64_t)0x2 * (x31 * x45)) + ((uint64_t)x30 * x43))))))))))));
-{ uint64_t x71 = ((((uint64_t)x5 * x39) + (((uint64_t)0x2 * (x7 * x37)) + (((uint64_t)0x2 * (x9 * x35)) + ((uint64_t)x11 * x33)))) + (0x3 * ((x13 * x58) + (((uint64_t)x15 * x59) + (((uint64_t)0x2 * (x17 * x57)) + (((uint64_t)0x2 * (x19 * x55)) + (((uint64_t)x21 * x53) + ((x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)0x2 * (x27 * x47)) + (((uint64_t)0x2 * (x29 * x45)) + (((uint64_t)x31 * x43) + (x30 * x41)))))))))))));
-{ uint64_t x72 = ((((uint64_t)x5 * x37) + (((uint64_t)0x2 * (x7 * x35)) + ((uint64_t)x9 * x33))) + (0x3 * ((x11 * x58) + ((x13 * x59) + (((uint64_t)x15 * x57) + (((uint64_t)0x2 * (x17 * x55)) + (((uint64_t)x19 * x53) + ((x21 * x51) + ((x23 * x49) + (((uint64_t)x25 * x47) + (((uint64_t)0x2 * (x27 * x45)) + (((uint64_t)x29 * x43) + ((uint64_t)(x31 * x41) + (x30 * x39))))))))))))));
-{ uint64_t x73 = ((((uint64_t)x5 * x35) + ((uint64_t)x7 * x33)) + (0x3 * ((x9 * x58) + ((x11 * x59) + ((x13 * x57) + (((uint64_t)x15 * x55) + (((uint64_t)x17 * x53) + ((x19 * x51) + ((x21 * x49) + ((x23 * x47) + (((uint64_t)x25 * x45) + (((uint64_t)x27 * x43) + ((x29 * x41) + ((uint64_t)(x31 * x39) + (x30 * x37)))))))))))))));
-{ uint64_t x74 = (((uint64_t)x5 * x33) + (0x3 * (((uint64_t)0x2 * (x7 * x58)) + (((uint64_t)0x2 * (x9 * x59)) + (((uint64_t)0x2 * (x11 * x57)) + (((uint64_t)0x2 * (x13 * x55)) + (((uint64_t)x15 * x53) + (((uint64_t)0x2 * (x17 * x51)) + (((uint64_t)0x2 * (x19 * x49)) + (((uint64_t)0x2 * (x21 * x47)) + (((uint64_t)0x2 * (x23 * x45)) + (((uint64_t)x25 * x43) + (((uint64_t)0x2 * (x27 * x41)) + (((uint64_t)0x2 * (x29 * x39)) + (((uint64_t)0x2 * (x31 * x37)) + ((uint64_t)0x2 * (x30 * x35)))))))))))))))));
-{ uint32_t x75 = (uint32_t) (x74 >> 0xf);
-{ uint32_t x76 = ((uint32_t)x74 & 0x7fff);
-{ uint64_t x77 = (x75 + x73);
-{ uint32_t x78 = (uint32_t) (x77 >> 0xe);
-{ uint32_t x79 = ((uint32_t)x77 & 0x3fff);
-{ uint64_t x80 = (x78 + x72);
-{ uint32_t x81 = (uint32_t) (x80 >> 0xe);
-{ uint32_t x82 = ((uint32_t)x80 & 0x3fff);
-{ uint64_t x83 = (x81 + x71);
-{ uint32_t x84 = (uint32_t) (x83 >> 0xe);
-{ uint32_t x85 = ((uint32_t)x83 & 0x3fff);
-{ uint64_t x86 = (x84 + x70);
-{ uint32_t x87 = (uint32_t) (x86 >> 0xe);
-{ uint32_t x88 = ((uint32_t)x86 & 0x3fff);
-{ uint64_t x89 = (x87 + x69);
-{ uint32_t x90 = (uint32_t) (x89 >> 0xf);
-{ uint32_t x91 = ((uint32_t)x89 & 0x7fff);
-{ uint64_t x92 = (x90 + x68);
-{ uint32_t x93 = (uint32_t) (x92 >> 0xe);
-{ uint32_t x94 = ((uint32_t)x92 & 0x3fff);
-{ uint64_t x95 = (x93 + x67);
-{ uint32_t x96 = (uint32_t) (x95 >> 0xe);
-{ uint32_t x97 = ((uint32_t)x95 & 0x3fff);
-{ uint64_t x98 = (x96 + x66);
-{ uint32_t x99 = (uint32_t) (x98 >> 0xe);
-{ uint32_t x100 = ((uint32_t)x98 & 0x3fff);
-{ uint64_t x101 = (x99 + x65);
-{ uint32_t x102 = (uint32_t) (x101 >> 0xe);
-{ uint32_t x103 = ((uint32_t)x101 & 0x3fff);
-{ uint64_t x104 = (x102 + x64);
-{ uint32_t x105 = (uint32_t) (x104 >> 0xf);
-{ uint32_t x106 = ((uint32_t)x104 & 0x7fff);
-{ uint64_t x107 = (x105 + x63);
-{ uint32_t x108 = (uint32_t) (x107 >> 0xe);
-{ uint32_t x109 = ((uint32_t)x107 & 0x3fff);
-{ uint64_t x110 = (x108 + x62);
-{ uint32_t x111 = (uint32_t) (x110 >> 0xe);
-{ uint32_t x112 = ((uint32_t)x110 & 0x3fff);
-{ uint64_t x113 = (x111 + x61);
-{ uint32_t x114 = (uint32_t) (x113 >> 0xe);
-{ uint32_t x115 = ((uint32_t)x113 & 0x3fff);
-{ uint64_t x116 = (x114 + x60);
-{ uint32_t x117 = (uint32_t) (x116 >> 0xe);
-{ uint32_t x118 = ((uint32_t)x116 & 0x3fff);
-{ uint32_t x119 = (x76 + (0x3 * x117));
-{ uint32_t x120 = (x119 >> 0xf);
-{ uint32_t x121 = (x119 & 0x7fff);
-{ uint32_t x122 = (x120 + x79);
-{ uint32_t x123 = (x122 >> 0xe);
-{ uint32_t x124 = (x122 & 0x3fff);
-out[0] = x118;
-out[1] = x115;
-out[2] = x112;
-out[3] = x109;
-out[4] = x106;
-out[5] = x103;
-out[6] = x100;
-out[7] = x97;
-out[8] = x94;
-out[9] = x91;
-out[10] = x88;
-out[11] = x85;
-out[12] = x123 + x82;
-out[13] = x124;
-out[14] = x121;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[15];
+static void femul(uint32_t out[15], const uint32_t in1[15], const uint32_t in2[15]) {
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x31 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x58 = in2[14];
+ { const uint32_t x59 = in2[13];
+ { const uint32_t x57 = in2[12];
+ { const uint32_t x55 = in2[11];
+ { const uint32_t x53 = in2[10];
+ { const uint32_t x51 = in2[9];
+ { const uint32_t x49 = in2[8];
+ { const uint32_t x47 = in2[7];
+ { const uint32_t x45 = in2[6];
+ { const uint32_t x43 = in2[5];
+ { const uint32_t x41 = in2[4];
+ { const uint32_t x39 = in2[3];
+ { const uint32_t x37 = in2[2];
+ { const uint32_t x35 = in2[1];
+ { const uint32_t x33 = in2[0];
+ { uint64_t x60 = (((uint64_t)x5 * x58) + (((uint64_t)0x2 * (x7 * x59)) + (((uint64_t)0x2 * (x9 * x57)) + (((uint64_t)0x2 * (x11 * x55)) + (((uint64_t)x13 * x53) + (((uint64_t)x15 * x51) + (((uint64_t)0x2 * (x17 * x49)) + (((uint64_t)0x2 * (x19 * x47)) + (((uint64_t)0x2 * (x21 * x45)) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + (((uint64_t)0x2 * (x27 * x39)) + (((uint64_t)0x2 * (x29 * x37)) + (((uint64_t)0x2 * (x31 * x35)) + ((uint64_t)x30 * x33)))))))))))))));
+ { uint64_t x61 = ((((uint64_t)x5 * x59) + (((uint64_t)0x2 * (x7 * x57)) + (((uint64_t)0x2 * (x9 * x55)) + (((uint64_t)x11 * x53) + ((x13 * x51) + (((uint64_t)x15 * x49) + (((uint64_t)0x2 * (x17 * x47)) + (((uint64_t)0x2 * (x19 * x45)) + (((uint64_t)x21 * x43) + ((x23 * x41) + (((uint64_t)x25 * x39) + (((uint64_t)0x2 * (x27 * x37)) + (((uint64_t)0x2 * (x29 * x35)) + ((uint64_t)x31 * x33)))))))))))))) + ((uint64_t)0x3 * (x30 * x58)));
+ { uint64_t x62 = ((((uint64_t)x5 * x57) + (((uint64_t)0x2 * (x7 * x55)) + (((uint64_t)x9 * x53) + ((x11 * x51) + ((x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)0x2 * (x17 * x45)) + (((uint64_t)x19 * x43) + ((x21 * x41) + ((x23 * x39) + (((uint64_t)x25 * x37) + (((uint64_t)0x2 * (x27 * x35)) + ((uint64_t)x29 * x33))))))))))))) + (0x3 * ((uint64_t)(x31 * x58) + (x30 * x59))));
+ { uint64_t x63 = ((((uint64_t)x5 * x55) + (((uint64_t)x7 * x53) + ((x9 * x51) + ((x11 * x49) + ((x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((x19 * x41) + ((x21 * x39) + ((x23 * x37) + (((uint64_t)x25 * x35) + ((uint64_t)x27 * x33)))))))))))) + (0x3 * ((x29 * x58) + ((uint64_t)(x31 * x59) + (x30 * x57)))));
+ { uint64_t x64 = ((((uint64_t)x5 * x53) + (((uint64_t)0x2 * (x7 * x51)) + (((uint64_t)0x2 * (x9 * x49)) + (((uint64_t)0x2 * (x11 * x47)) + (((uint64_t)0x2 * (x13 * x45)) + (((uint64_t)x15 * x43) + (((uint64_t)0x2 * (x17 * x41)) + (((uint64_t)0x2 * (x19 * x39)) + (((uint64_t)0x2 * (x21 * x37)) + (((uint64_t)0x2 * (x23 * x35)) + ((uint64_t)x25 * x33))))))))))) + (0x3 * (((uint64_t)0x2 * (x27 * x58)) + (((uint64_t)0x2 * (x29 * x59)) + (((uint64_t)0x2 * (x31 * x57)) + ((uint64_t)0x2 * (x30 * x55)))))));
+ { uint64_t x65 = ((((uint64_t)x5 * x51) + (((uint64_t)0x2 * (x7 * x49)) + (((uint64_t)0x2 * (x9 * x47)) + (((uint64_t)0x2 * (x11 * x45)) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)0x2 * (x17 * x39)) + (((uint64_t)0x2 * (x19 * x37)) + (((uint64_t)0x2 * (x21 * x35)) + ((uint64_t)x23 * x33)))))))))) + (0x3 * (((uint64_t)x25 * x58) + (((uint64_t)0x2 * (x27 * x59)) + (((uint64_t)0x2 * (x29 * x57)) + (((uint64_t)0x2 * (x31 * x55)) + ((uint64_t)x30 * x53)))))));
+ { uint64_t x66 = ((((uint64_t)x5 * x49) + (((uint64_t)0x2 * (x7 * x47)) + (((uint64_t)0x2 * (x9 * x45)) + (((uint64_t)x11 * x43) + ((x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)0x2 * (x17 * x37)) + (((uint64_t)0x2 * (x19 * x35)) + ((uint64_t)x21 * x33))))))))) + (0x3 * ((x23 * x58) + (((uint64_t)x25 * x59) + (((uint64_t)0x2 * (x27 * x57)) + (((uint64_t)0x2 * (x29 * x55)) + (((uint64_t)x31 * x53) + (x30 * x51))))))));
+ { uint64_t x67 = ((((uint64_t)x5 * x47) + (((uint64_t)0x2 * (x7 * x45)) + (((uint64_t)x9 * x43) + ((x11 * x41) + ((x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)0x2 * (x17 * x35)) + ((uint64_t)x19 * x33)))))))) + (0x3 * ((x21 * x58) + ((x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)0x2 * (x27 * x55)) + (((uint64_t)x29 * x53) + ((uint64_t)(x31 * x51) + (x30 * x49)))))))));
+ { uint64_t x68 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + ((x9 * x41) + ((x11 * x39) + ((x13 * x37) + (((uint64_t)x15 * x35) + ((uint64_t)x17 * x33))))))) + (0x3 * ((x19 * x58) + ((x21 * x59) + ((x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + ((x29 * x51) + ((uint64_t)(x31 * x49) + (x30 * x47))))))))));
+ { uint64_t x69 = ((((uint64_t)x5 * x43) + (((uint64_t)0x2 * (x7 * x41)) + (((uint64_t)0x2 * (x9 * x39)) + (((uint64_t)0x2 * (x11 * x37)) + (((uint64_t)0x2 * (x13 * x35)) + ((uint64_t)x15 * x33)))))) + (0x3 * (((uint64_t)0x2 * (x17 * x58)) + (((uint64_t)0x2 * (x19 * x59)) + (((uint64_t)0x2 * (x21 * x57)) + (((uint64_t)0x2 * (x23 * x55)) + (((uint64_t)x25 * x53) + (((uint64_t)0x2 * (x27 * x51)) + (((uint64_t)0x2 * (x29 * x49)) + (((uint64_t)0x2 * (x31 * x47)) + ((uint64_t)0x2 * (x30 * x45))))))))))));
+ { uint64_t x70 = ((((uint64_t)x5 * x41) + (((uint64_t)0x2 * (x7 * x39)) + (((uint64_t)0x2 * (x9 * x37)) + (((uint64_t)0x2 * (x11 * x35)) + ((uint64_t)x13 * x33))))) + (0x3 * (((uint64_t)x15 * x58) + (((uint64_t)0x2 * (x17 * x59)) + (((uint64_t)0x2 * (x19 * x57)) + (((uint64_t)0x2 * (x21 * x55)) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)0x2 * (x27 * x49)) + (((uint64_t)0x2 * (x29 * x47)) + (((uint64_t)0x2 * (x31 * x45)) + ((uint64_t)x30 * x43))))))))))));
+ { uint64_t x71 = ((((uint64_t)x5 * x39) + (((uint64_t)0x2 * (x7 * x37)) + (((uint64_t)0x2 * (x9 * x35)) + ((uint64_t)x11 * x33)))) + (0x3 * ((x13 * x58) + (((uint64_t)x15 * x59) + (((uint64_t)0x2 * (x17 * x57)) + (((uint64_t)0x2 * (x19 * x55)) + (((uint64_t)x21 * x53) + ((x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)0x2 * (x27 * x47)) + (((uint64_t)0x2 * (x29 * x45)) + (((uint64_t)x31 * x43) + (x30 * x41)))))))))))));
+ { uint64_t x72 = ((((uint64_t)x5 * x37) + (((uint64_t)0x2 * (x7 * x35)) + ((uint64_t)x9 * x33))) + (0x3 * ((x11 * x58) + ((x13 * x59) + (((uint64_t)x15 * x57) + (((uint64_t)0x2 * (x17 * x55)) + (((uint64_t)x19 * x53) + ((x21 * x51) + ((x23 * x49) + (((uint64_t)x25 * x47) + (((uint64_t)0x2 * (x27 * x45)) + (((uint64_t)x29 * x43) + ((uint64_t)(x31 * x41) + (x30 * x39))))))))))))));
+ { uint64_t x73 = ((((uint64_t)x5 * x35) + ((uint64_t)x7 * x33)) + (0x3 * ((x9 * x58) + ((x11 * x59) + ((x13 * x57) + (((uint64_t)x15 * x55) + (((uint64_t)x17 * x53) + ((x19 * x51) + ((x21 * x49) + ((x23 * x47) + (((uint64_t)x25 * x45) + (((uint64_t)x27 * x43) + ((x29 * x41) + ((uint64_t)(x31 * x39) + (x30 * x37)))))))))))))));
+ { uint64_t x74 = (((uint64_t)x5 * x33) + (0x3 * (((uint64_t)0x2 * (x7 * x58)) + (((uint64_t)0x2 * (x9 * x59)) + (((uint64_t)0x2 * (x11 * x57)) + (((uint64_t)0x2 * (x13 * x55)) + (((uint64_t)x15 * x53) + (((uint64_t)0x2 * (x17 * x51)) + (((uint64_t)0x2 * (x19 * x49)) + (((uint64_t)0x2 * (x21 * x47)) + (((uint64_t)0x2 * (x23 * x45)) + (((uint64_t)x25 * x43) + (((uint64_t)0x2 * (x27 * x41)) + (((uint64_t)0x2 * (x29 * x39)) + (((uint64_t)0x2 * (x31 * x37)) + ((uint64_t)0x2 * (x30 * x35)))))))))))))))));
+ { uint32_t x75 = (uint32_t) (x74 >> 0xf);
+ { uint32_t x76 = ((uint32_t)x74 & 0x7fff);
+ { uint64_t x77 = (x75 + x73);
+ { uint32_t x78 = (uint32_t) (x77 >> 0xe);
+ { uint32_t x79 = ((uint32_t)x77 & 0x3fff);
+ { uint64_t x80 = (x78 + x72);
+ { uint32_t x81 = (uint32_t) (x80 >> 0xe);
+ { uint32_t x82 = ((uint32_t)x80 & 0x3fff);
+ { uint64_t x83 = (x81 + x71);
+ { uint32_t x84 = (uint32_t) (x83 >> 0xe);
+ { uint32_t x85 = ((uint32_t)x83 & 0x3fff);
+ { uint64_t x86 = (x84 + x70);
+ { uint32_t x87 = (uint32_t) (x86 >> 0xe);
+ { uint32_t x88 = ((uint32_t)x86 & 0x3fff);
+ { uint64_t x89 = (x87 + x69);
+ { uint32_t x90 = (uint32_t) (x89 >> 0xf);
+ { uint32_t x91 = ((uint32_t)x89 & 0x7fff);
+ { uint64_t x92 = (x90 + x68);
+ { uint32_t x93 = (uint32_t) (x92 >> 0xe);
+ { uint32_t x94 = ((uint32_t)x92 & 0x3fff);
+ { uint64_t x95 = (x93 + x67);
+ { uint32_t x96 = (uint32_t) (x95 >> 0xe);
+ { uint32_t x97 = ((uint32_t)x95 & 0x3fff);
+ { uint64_t x98 = (x96 + x66);
+ { uint32_t x99 = (uint32_t) (x98 >> 0xe);
+ { uint32_t x100 = ((uint32_t)x98 & 0x3fff);
+ { uint64_t x101 = (x99 + x65);
+ { uint32_t x102 = (uint32_t) (x101 >> 0xe);
+ { uint32_t x103 = ((uint32_t)x101 & 0x3fff);
+ { uint64_t x104 = (x102 + x64);
+ { uint32_t x105 = (uint32_t) (x104 >> 0xf);
+ { uint32_t x106 = ((uint32_t)x104 & 0x7fff);
+ { uint64_t x107 = (x105 + x63);
+ { uint32_t x108 = (uint32_t) (x107 >> 0xe);
+ { uint32_t x109 = ((uint32_t)x107 & 0x3fff);
+ { uint64_t x110 = (x108 + x62);
+ { uint32_t x111 = (uint32_t) (x110 >> 0xe);
+ { uint32_t x112 = ((uint32_t)x110 & 0x3fff);
+ { uint64_t x113 = (x111 + x61);
+ { uint32_t x114 = (uint32_t) (x113 >> 0xe);
+ { uint32_t x115 = ((uint32_t)x113 & 0x3fff);
+ { uint64_t x116 = (x114 + x60);
+ { uint32_t x117 = (uint32_t) (x116 >> 0xe);
+ { uint32_t x118 = ((uint32_t)x116 & 0x3fff);
+ { uint32_t x119 = (x76 + (0x3 * x117));
+ { uint32_t x120 = (x119 >> 0xf);
+ { uint32_t x121 = (x119 & 0x7fff);
+ { uint32_t x122 = (x120 + x79);
+ { uint32_t x123 = (x122 >> 0xe);
+ { uint32_t x124 = (x122 & 0x3fff);
+ out[0] = x121;
+ out[1] = x124;
+ out[2] = (x123 + x82);
+ out[3] = x85;
+ out[4] = x88;
+ out[5] = x91;
+ out[6] = x94;
+ out[7] = x97;
+ out[8] = x100;
+ out[9] = x103;
+ out[10] = x106;
+ out[11] = x109;
+ out[12] = x112;
+ out[13] = x115;
+ out[14] = x118;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e213m3/fesquare.c b/src/Specific/solinas32_2e213m3/fesquare.c
index 70a9e562d..0fd4c1fe4 100644
--- a/src/Specific/solinas32_2e213m3/fesquare.c
+++ b/src/Specific/solinas32_2e213m3/fesquare.c
@@ -1,101 +1,98 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x29 = (((uint64_t)x2 * x27) + (((uint64_t)0x2 * (x4 * x28)) + (((uint64_t)0x2 * (x6 * x26)) + (((uint64_t)0x2 * (x8 * x24)) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + (((uint64_t)0x2 * (x14 * x18)) + (((uint64_t)0x2 * (x16 * x16)) + (((uint64_t)0x2 * (x18 * x14)) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + (((uint64_t)0x2 * (x24 * x8)) + (((uint64_t)0x2 * (x26 * x6)) + (((uint64_t)0x2 * (x28 * x4)) + ((uint64_t)x27 * x2)))))))))))))));
-{ uint64_t x30 = ((((uint64_t)x2 * x28) + (((uint64_t)0x2 * (x4 * x26)) + (((uint64_t)0x2 * (x6 * x24)) + (((uint64_t)x8 * x22) + ((x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)0x2 * (x14 * x16)) + (((uint64_t)0x2 * (x16 * x14)) + (((uint64_t)x18 * x12) + ((x20 * x10) + (((uint64_t)x22 * x8) + (((uint64_t)0x2 * (x24 * x6)) + (((uint64_t)0x2 * (x26 * x4)) + ((uint64_t)x28 * x2)))))))))))))) + ((uint64_t)0x3 * (x27 * x27)));
-{ uint64_t x31 = ((((uint64_t)x2 * x26) + (((uint64_t)0x2 * (x4 * x24)) + (((uint64_t)x6 * x22) + ((x8 * x20) + ((x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)0x2 * (x14 * x14)) + (((uint64_t)x16 * x12) + ((x18 * x10) + ((x20 * x8) + (((uint64_t)x22 * x6) + (((uint64_t)0x2 * (x24 * x4)) + ((uint64_t)x26 * x2))))))))))))) + (0x3 * ((uint64_t)(x28 * x27) + (x27 * x28))));
-{ uint64_t x32 = ((((uint64_t)x2 * x24) + (((uint64_t)x4 * x22) + ((x6 * x20) + ((x8 * x18) + ((x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((x16 * x10) + ((x18 * x8) + ((x20 * x6) + (((uint64_t)x22 * x4) + ((uint64_t)x24 * x2)))))))))))) + (0x3 * ((x26 * x27) + ((uint64_t)(x28 * x28) + (x27 * x26)))));
-{ uint64_t x33 = ((((uint64_t)x2 * x22) + (((uint64_t)0x2 * (x4 * x20)) + (((uint64_t)0x2 * (x6 * x18)) + (((uint64_t)0x2 * (x8 * x16)) + (((uint64_t)0x2 * (x10 * x14)) + (((uint64_t)x12 * x12) + (((uint64_t)0x2 * (x14 * x10)) + (((uint64_t)0x2 * (x16 * x8)) + (((uint64_t)0x2 * (x18 * x6)) + (((uint64_t)0x2 * (x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x3 * (((uint64_t)0x2 * (x24 * x27)) + (((uint64_t)0x2 * (x26 * x28)) + (((uint64_t)0x2 * (x28 * x26)) + ((uint64_t)0x2 * (x27 * x24)))))));
-{ uint64_t x34 = ((((uint64_t)x2 * x20) + (((uint64_t)0x2 * (x4 * x18)) + (((uint64_t)0x2 * (x6 * x16)) + (((uint64_t)0x2 * (x8 * x14)) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)0x2 * (x14 * x8)) + (((uint64_t)0x2 * (x16 * x6)) + (((uint64_t)0x2 * (x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x3 * (((uint64_t)x22 * x27) + (((uint64_t)0x2 * (x24 * x28)) + (((uint64_t)0x2 * (x26 * x26)) + (((uint64_t)0x2 * (x28 * x24)) + ((uint64_t)x27 * x22)))))));
-{ uint64_t x35 = ((((uint64_t)x2 * x18) + (((uint64_t)0x2 * (x4 * x16)) + (((uint64_t)0x2 * (x6 * x14)) + (((uint64_t)x8 * x12) + ((x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)0x2 * (x14 * x6)) + (((uint64_t)0x2 * (x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x3 * ((x20 * x27) + (((uint64_t)x22 * x28) + (((uint64_t)0x2 * (x24 * x26)) + (((uint64_t)0x2 * (x26 * x24)) + (((uint64_t)x28 * x22) + (x27 * x20))))))));
-{ uint64_t x36 = ((((uint64_t)x2 * x16) + (((uint64_t)0x2 * (x4 * x14)) + (((uint64_t)x6 * x12) + ((x8 * x10) + ((x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)0x2 * (x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x3 * ((x18 * x27) + ((x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)0x2 * (x24 * x24)) + (((uint64_t)x26 * x22) + ((uint64_t)(x28 * x20) + (x27 * x18)))))))));
-{ uint64_t x37 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + ((x6 * x10) + ((x8 * x8) + ((x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x3 * ((x16 * x27) + ((x18 * x28) + ((x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + ((x26 * x20) + ((uint64_t)(x28 * x18) + (x27 * x16))))))))));
-{ uint64_t x38 = ((((uint64_t)x2 * x12) + (((uint64_t)0x2 * (x4 * x10)) + (((uint64_t)0x2 * (x6 * x8)) + (((uint64_t)0x2 * (x8 * x6)) + (((uint64_t)0x2 * (x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x3 * (((uint64_t)0x2 * (x14 * x27)) + (((uint64_t)0x2 * (x16 * x28)) + (((uint64_t)0x2 * (x18 * x26)) + (((uint64_t)0x2 * (x20 * x24)) + (((uint64_t)x22 * x22) + (((uint64_t)0x2 * (x24 * x20)) + (((uint64_t)0x2 * (x26 * x18)) + (((uint64_t)0x2 * (x28 * x16)) + ((uint64_t)0x2 * (x27 * x14))))))))))));
-{ uint64_t x39 = ((((uint64_t)x2 * x10) + (((uint64_t)0x2 * (x4 * x8)) + (((uint64_t)0x2 * (x6 * x6)) + (((uint64_t)0x2 * (x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x3 * (((uint64_t)x12 * x27) + (((uint64_t)0x2 * (x14 * x28)) + (((uint64_t)0x2 * (x16 * x26)) + (((uint64_t)0x2 * (x18 * x24)) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)0x2 * (x24 * x18)) + (((uint64_t)0x2 * (x26 * x16)) + (((uint64_t)0x2 * (x28 * x14)) + ((uint64_t)x27 * x12))))))))))));
-{ uint64_t x40 = ((((uint64_t)x2 * x8) + (((uint64_t)0x2 * (x4 * x6)) + (((uint64_t)0x2 * (x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x3 * ((x10 * x27) + (((uint64_t)x12 * x28) + (((uint64_t)0x2 * (x14 * x26)) + (((uint64_t)0x2 * (x16 * x24)) + (((uint64_t)x18 * x22) + ((x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)0x2 * (x24 * x16)) + (((uint64_t)0x2 * (x26 * x14)) + (((uint64_t)x28 * x12) + (x27 * x10)))))))))))));
-{ uint64_t x41 = ((((uint64_t)x2 * x6) + (((uint64_t)0x2 * (x4 * x4)) + ((uint64_t)x6 * x2))) + (0x3 * ((x8 * x27) + ((x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)0x2 * (x14 * x24)) + (((uint64_t)x16 * x22) + ((x18 * x20) + ((x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)0x2 * (x24 * x14)) + (((uint64_t)x26 * x12) + ((uint64_t)(x28 * x10) + (x27 * x8))))))))))))));
-{ uint64_t x42 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x3 * ((x6 * x27) + ((x8 * x28) + ((x10 * x26) + (((uint64_t)x12 * x24) + (((uint64_t)x14 * x22) + ((x16 * x20) + ((x18 * x18) + ((x20 * x16) + (((uint64_t)x22 * x14) + (((uint64_t)x24 * x12) + ((x26 * x10) + ((uint64_t)(x28 * x8) + (x27 * x6)))))))))))))));
-{ uint64_t x43 = (((uint64_t)x2 * x2) + (0x3 * (((uint64_t)0x2 * (x4 * x27)) + (((uint64_t)0x2 * (x6 * x28)) + (((uint64_t)0x2 * (x8 * x26)) + (((uint64_t)0x2 * (x10 * x24)) + (((uint64_t)x12 * x22) + (((uint64_t)0x2 * (x14 * x20)) + (((uint64_t)0x2 * (x16 * x18)) + (((uint64_t)0x2 * (x18 * x16)) + (((uint64_t)0x2 * (x20 * x14)) + (((uint64_t)x22 * x12) + (((uint64_t)0x2 * (x24 * x10)) + (((uint64_t)0x2 * (x26 * x8)) + (((uint64_t)0x2 * (x28 * x6)) + ((uint64_t)0x2 * (x27 * x4)))))))))))))))));
-{ uint32_t x44 = (uint32_t) (x43 >> 0xf);
-{ uint32_t x45 = ((uint32_t)x43 & 0x7fff);
-{ uint64_t x46 = (x44 + x42);
-{ uint32_t x47 = (uint32_t) (x46 >> 0xe);
-{ uint32_t x48 = ((uint32_t)x46 & 0x3fff);
-{ uint64_t x49 = (x47 + x41);
-{ uint32_t x50 = (uint32_t) (x49 >> 0xe);
-{ uint32_t x51 = ((uint32_t)x49 & 0x3fff);
-{ uint64_t x52 = (x50 + x40);
-{ uint32_t x53 = (uint32_t) (x52 >> 0xe);
-{ uint32_t x54 = ((uint32_t)x52 & 0x3fff);
-{ uint64_t x55 = (x53 + x39);
-{ uint32_t x56 = (uint32_t) (x55 >> 0xe);
-{ uint32_t x57 = ((uint32_t)x55 & 0x3fff);
-{ uint64_t x58 = (x56 + x38);
-{ uint32_t x59 = (uint32_t) (x58 >> 0xf);
-{ uint32_t x60 = ((uint32_t)x58 & 0x7fff);
-{ uint64_t x61 = (x59 + x37);
-{ uint32_t x62 = (uint32_t) (x61 >> 0xe);
-{ uint32_t x63 = ((uint32_t)x61 & 0x3fff);
-{ uint64_t x64 = (x62 + x36);
-{ uint32_t x65 = (uint32_t) (x64 >> 0xe);
-{ uint32_t x66 = ((uint32_t)x64 & 0x3fff);
-{ uint64_t x67 = (x65 + x35);
-{ uint32_t x68 = (uint32_t) (x67 >> 0xe);
-{ uint32_t x69 = ((uint32_t)x67 & 0x3fff);
-{ uint64_t x70 = (x68 + x34);
-{ uint32_t x71 = (uint32_t) (x70 >> 0xe);
-{ uint32_t x72 = ((uint32_t)x70 & 0x3fff);
-{ uint64_t x73 = (x71 + x33);
-{ uint32_t x74 = (uint32_t) (x73 >> 0xf);
-{ uint32_t x75 = ((uint32_t)x73 & 0x7fff);
-{ uint64_t x76 = (x74 + x32);
-{ uint32_t x77 = (uint32_t) (x76 >> 0xe);
-{ uint32_t x78 = ((uint32_t)x76 & 0x3fff);
-{ uint64_t x79 = (x77 + x31);
-{ uint32_t x80 = (uint32_t) (x79 >> 0xe);
-{ uint32_t x81 = ((uint32_t)x79 & 0x3fff);
-{ uint64_t x82 = (x80 + x30);
-{ uint32_t x83 = (uint32_t) (x82 >> 0xe);
-{ uint32_t x84 = ((uint32_t)x82 & 0x3fff);
-{ uint64_t x85 = (x83 + x29);
-{ uint32_t x86 = (uint32_t) (x85 >> 0xe);
-{ uint32_t x87 = ((uint32_t)x85 & 0x3fff);
-{ uint32_t x88 = (x45 + (0x3 * x86));
-{ uint32_t x89 = (x88 >> 0xf);
-{ uint32_t x90 = (x88 & 0x7fff);
-{ uint32_t x91 = (x89 + x48);
-{ uint32_t x92 = (x91 >> 0xe);
-{ uint32_t x93 = (x91 & 0x3fff);
-out[0] = x87;
-out[1] = x84;
-out[2] = x81;
-out[3] = x78;
-out[4] = x75;
-out[5] = x72;
-out[6] = x69;
-out[7] = x66;
-out[8] = x63;
-out[9] = x60;
-out[10] = x57;
-out[11] = x54;
-out[12] = x92 + x51;
-out[13] = x93;
-out[14] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[15];
+static void fesquare(uint32_t out[15], const uint32_t in1[15]) {
+ { const uint32_t x27 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x29 = (((uint64_t)x2 * x27) + (((uint64_t)0x2 * (x4 * x28)) + (((uint64_t)0x2 * (x6 * x26)) + (((uint64_t)0x2 * (x8 * x24)) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + (((uint64_t)0x2 * (x14 * x18)) + (((uint64_t)0x2 * (x16 * x16)) + (((uint64_t)0x2 * (x18 * x14)) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + (((uint64_t)0x2 * (x24 * x8)) + (((uint64_t)0x2 * (x26 * x6)) + (((uint64_t)0x2 * (x28 * x4)) + ((uint64_t)x27 * x2)))))))))))))));
+ { uint64_t x30 = ((((uint64_t)x2 * x28) + (((uint64_t)0x2 * (x4 * x26)) + (((uint64_t)0x2 * (x6 * x24)) + (((uint64_t)x8 * x22) + ((x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)0x2 * (x14 * x16)) + (((uint64_t)0x2 * (x16 * x14)) + (((uint64_t)x18 * x12) + ((x20 * x10) + (((uint64_t)x22 * x8) + (((uint64_t)0x2 * (x24 * x6)) + (((uint64_t)0x2 * (x26 * x4)) + ((uint64_t)x28 * x2)))))))))))))) + ((uint64_t)0x3 * (x27 * x27)));
+ { uint64_t x31 = ((((uint64_t)x2 * x26) + (((uint64_t)0x2 * (x4 * x24)) + (((uint64_t)x6 * x22) + ((x8 * x20) + ((x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)0x2 * (x14 * x14)) + (((uint64_t)x16 * x12) + ((x18 * x10) + ((x20 * x8) + (((uint64_t)x22 * x6) + (((uint64_t)0x2 * (x24 * x4)) + ((uint64_t)x26 * x2))))))))))))) + (0x3 * ((uint64_t)(x28 * x27) + (x27 * x28))));
+ { uint64_t x32 = ((((uint64_t)x2 * x24) + (((uint64_t)x4 * x22) + ((x6 * x20) + ((x8 * x18) + ((x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((x16 * x10) + ((x18 * x8) + ((x20 * x6) + (((uint64_t)x22 * x4) + ((uint64_t)x24 * x2)))))))))))) + (0x3 * ((x26 * x27) + ((uint64_t)(x28 * x28) + (x27 * x26)))));
+ { uint64_t x33 = ((((uint64_t)x2 * x22) + (((uint64_t)0x2 * (x4 * x20)) + (((uint64_t)0x2 * (x6 * x18)) + (((uint64_t)0x2 * (x8 * x16)) + (((uint64_t)0x2 * (x10 * x14)) + (((uint64_t)x12 * x12) + (((uint64_t)0x2 * (x14 * x10)) + (((uint64_t)0x2 * (x16 * x8)) + (((uint64_t)0x2 * (x18 * x6)) + (((uint64_t)0x2 * (x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x3 * (((uint64_t)0x2 * (x24 * x27)) + (((uint64_t)0x2 * (x26 * x28)) + (((uint64_t)0x2 * (x28 * x26)) + ((uint64_t)0x2 * (x27 * x24)))))));
+ { uint64_t x34 = ((((uint64_t)x2 * x20) + (((uint64_t)0x2 * (x4 * x18)) + (((uint64_t)0x2 * (x6 * x16)) + (((uint64_t)0x2 * (x8 * x14)) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)0x2 * (x14 * x8)) + (((uint64_t)0x2 * (x16 * x6)) + (((uint64_t)0x2 * (x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x3 * (((uint64_t)x22 * x27) + (((uint64_t)0x2 * (x24 * x28)) + (((uint64_t)0x2 * (x26 * x26)) + (((uint64_t)0x2 * (x28 * x24)) + ((uint64_t)x27 * x22)))))));
+ { uint64_t x35 = ((((uint64_t)x2 * x18) + (((uint64_t)0x2 * (x4 * x16)) + (((uint64_t)0x2 * (x6 * x14)) + (((uint64_t)x8 * x12) + ((x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)0x2 * (x14 * x6)) + (((uint64_t)0x2 * (x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x3 * ((x20 * x27) + (((uint64_t)x22 * x28) + (((uint64_t)0x2 * (x24 * x26)) + (((uint64_t)0x2 * (x26 * x24)) + (((uint64_t)x28 * x22) + (x27 * x20))))))));
+ { uint64_t x36 = ((((uint64_t)x2 * x16) + (((uint64_t)0x2 * (x4 * x14)) + (((uint64_t)x6 * x12) + ((x8 * x10) + ((x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)0x2 * (x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x3 * ((x18 * x27) + ((x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)0x2 * (x24 * x24)) + (((uint64_t)x26 * x22) + ((uint64_t)(x28 * x20) + (x27 * x18)))))))));
+ { uint64_t x37 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + ((x6 * x10) + ((x8 * x8) + ((x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x3 * ((x16 * x27) + ((x18 * x28) + ((x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + ((x26 * x20) + ((uint64_t)(x28 * x18) + (x27 * x16))))))))));
+ { uint64_t x38 = ((((uint64_t)x2 * x12) + (((uint64_t)0x2 * (x4 * x10)) + (((uint64_t)0x2 * (x6 * x8)) + (((uint64_t)0x2 * (x8 * x6)) + (((uint64_t)0x2 * (x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x3 * (((uint64_t)0x2 * (x14 * x27)) + (((uint64_t)0x2 * (x16 * x28)) + (((uint64_t)0x2 * (x18 * x26)) + (((uint64_t)0x2 * (x20 * x24)) + (((uint64_t)x22 * x22) + (((uint64_t)0x2 * (x24 * x20)) + (((uint64_t)0x2 * (x26 * x18)) + (((uint64_t)0x2 * (x28 * x16)) + ((uint64_t)0x2 * (x27 * x14))))))))))));
+ { uint64_t x39 = ((((uint64_t)x2 * x10) + (((uint64_t)0x2 * (x4 * x8)) + (((uint64_t)0x2 * (x6 * x6)) + (((uint64_t)0x2 * (x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x3 * (((uint64_t)x12 * x27) + (((uint64_t)0x2 * (x14 * x28)) + (((uint64_t)0x2 * (x16 * x26)) + (((uint64_t)0x2 * (x18 * x24)) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)0x2 * (x24 * x18)) + (((uint64_t)0x2 * (x26 * x16)) + (((uint64_t)0x2 * (x28 * x14)) + ((uint64_t)x27 * x12))))))))))));
+ { uint64_t x40 = ((((uint64_t)x2 * x8) + (((uint64_t)0x2 * (x4 * x6)) + (((uint64_t)0x2 * (x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x3 * ((x10 * x27) + (((uint64_t)x12 * x28) + (((uint64_t)0x2 * (x14 * x26)) + (((uint64_t)0x2 * (x16 * x24)) + (((uint64_t)x18 * x22) + ((x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)0x2 * (x24 * x16)) + (((uint64_t)0x2 * (x26 * x14)) + (((uint64_t)x28 * x12) + (x27 * x10)))))))))))));
+ { uint64_t x41 = ((((uint64_t)x2 * x6) + (((uint64_t)0x2 * (x4 * x4)) + ((uint64_t)x6 * x2))) + (0x3 * ((x8 * x27) + ((x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)0x2 * (x14 * x24)) + (((uint64_t)x16 * x22) + ((x18 * x20) + ((x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)0x2 * (x24 * x14)) + (((uint64_t)x26 * x12) + ((uint64_t)(x28 * x10) + (x27 * x8))))))))))))));
+ { uint64_t x42 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x3 * ((x6 * x27) + ((x8 * x28) + ((x10 * x26) + (((uint64_t)x12 * x24) + (((uint64_t)x14 * x22) + ((x16 * x20) + ((x18 * x18) + ((x20 * x16) + (((uint64_t)x22 * x14) + (((uint64_t)x24 * x12) + ((x26 * x10) + ((uint64_t)(x28 * x8) + (x27 * x6)))))))))))))));
+ { uint64_t x43 = (((uint64_t)x2 * x2) + (0x3 * (((uint64_t)0x2 * (x4 * x27)) + (((uint64_t)0x2 * (x6 * x28)) + (((uint64_t)0x2 * (x8 * x26)) + (((uint64_t)0x2 * (x10 * x24)) + (((uint64_t)x12 * x22) + (((uint64_t)0x2 * (x14 * x20)) + (((uint64_t)0x2 * (x16 * x18)) + (((uint64_t)0x2 * (x18 * x16)) + (((uint64_t)0x2 * (x20 * x14)) + (((uint64_t)x22 * x12) + (((uint64_t)0x2 * (x24 * x10)) + (((uint64_t)0x2 * (x26 * x8)) + (((uint64_t)0x2 * (x28 * x6)) + ((uint64_t)0x2 * (x27 * x4)))))))))))))))));
+ { uint32_t x44 = (uint32_t) (x43 >> 0xf);
+ { uint32_t x45 = ((uint32_t)x43 & 0x7fff);
+ { uint64_t x46 = (x44 + x42);
+ { uint32_t x47 = (uint32_t) (x46 >> 0xe);
+ { uint32_t x48 = ((uint32_t)x46 & 0x3fff);
+ { uint64_t x49 = (x47 + x41);
+ { uint32_t x50 = (uint32_t) (x49 >> 0xe);
+ { uint32_t x51 = ((uint32_t)x49 & 0x3fff);
+ { uint64_t x52 = (x50 + x40);
+ { uint32_t x53 = (uint32_t) (x52 >> 0xe);
+ { uint32_t x54 = ((uint32_t)x52 & 0x3fff);
+ { uint64_t x55 = (x53 + x39);
+ { uint32_t x56 = (uint32_t) (x55 >> 0xe);
+ { uint32_t x57 = ((uint32_t)x55 & 0x3fff);
+ { uint64_t x58 = (x56 + x38);
+ { uint32_t x59 = (uint32_t) (x58 >> 0xf);
+ { uint32_t x60 = ((uint32_t)x58 & 0x7fff);
+ { uint64_t x61 = (x59 + x37);
+ { uint32_t x62 = (uint32_t) (x61 >> 0xe);
+ { uint32_t x63 = ((uint32_t)x61 & 0x3fff);
+ { uint64_t x64 = (x62 + x36);
+ { uint32_t x65 = (uint32_t) (x64 >> 0xe);
+ { uint32_t x66 = ((uint32_t)x64 & 0x3fff);
+ { uint64_t x67 = (x65 + x35);
+ { uint32_t x68 = (uint32_t) (x67 >> 0xe);
+ { uint32_t x69 = ((uint32_t)x67 & 0x3fff);
+ { uint64_t x70 = (x68 + x34);
+ { uint32_t x71 = (uint32_t) (x70 >> 0xe);
+ { uint32_t x72 = ((uint32_t)x70 & 0x3fff);
+ { uint64_t x73 = (x71 + x33);
+ { uint32_t x74 = (uint32_t) (x73 >> 0xf);
+ { uint32_t x75 = ((uint32_t)x73 & 0x7fff);
+ { uint64_t x76 = (x74 + x32);
+ { uint32_t x77 = (uint32_t) (x76 >> 0xe);
+ { uint32_t x78 = ((uint32_t)x76 & 0x3fff);
+ { uint64_t x79 = (x77 + x31);
+ { uint32_t x80 = (uint32_t) (x79 >> 0xe);
+ { uint32_t x81 = ((uint32_t)x79 & 0x3fff);
+ { uint64_t x82 = (x80 + x30);
+ { uint32_t x83 = (uint32_t) (x82 >> 0xe);
+ { uint32_t x84 = ((uint32_t)x82 & 0x3fff);
+ { uint64_t x85 = (x83 + x29);
+ { uint32_t x86 = (uint32_t) (x85 >> 0xe);
+ { uint32_t x87 = ((uint32_t)x85 & 0x3fff);
+ { uint32_t x88 = (x45 + (0x3 * x86));
+ { uint32_t x89 = (x88 >> 0xf);
+ { uint32_t x90 = (x88 & 0x7fff);
+ { uint32_t x91 = (x89 + x48);
+ { uint32_t x92 = (x91 >> 0xe);
+ { uint32_t x93 = (x91 & 0x3fff);
+ out[0] = x90;
+ out[1] = x93;
+ out[2] = (x92 + x51);
+ out[3] = x54;
+ out[4] = x57;
+ out[5] = x60;
+ out[6] = x63;
+ out[7] = x66;
+ out[8] = x69;
+ out[9] = x72;
+ out[10] = x75;
+ out[11] = x78;
+ out[12] = x81;
+ out[13] = x84;
+ out[14] = x87;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e213m3/freeze.c b/src/Specific/solinas32_2e213m3/freeze.c
index 595021a15..35244bbe7 100644
--- a/src/Specific/solinas32_2e213m3/freeze.c
+++ b/src/Specific/solinas32_2e213m3/freeze.c
@@ -1,25 +1,79 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x30;
-out[1] = uint8_t x31 = Op Syntax.SubWithGetBorrow 15 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7ffd;;
+static void freeze(uint32_t out[15], const uint32_t in1[15]) {
+ { const uint32_t x27 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x30, uint8_t x31 = Op (Syntax.SubWithGetBorrow 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x7ffd);
+ { uint32_t x33, uint8_t x34 = Op (Syntax.SubWithGetBorrow 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x31, Return x4, 0x3fff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.SubWithGetBorrow 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x34, Return x6, 0x3fff);
+ { uint32_t x39, uint8_t x40 = Op (Syntax.SubWithGetBorrow 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x8, 0x3fff);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.SubWithGetBorrow 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x40, Return x10, 0x3fff);
+ { uint32_t x45, uint8_t x46 = Op (Syntax.SubWithGetBorrow 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x43, Return x12, 0x7fff);
+ { uint32_t x48, uint8_t x49 = Op (Syntax.SubWithGetBorrow 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x46, Return x14, 0x3fff);
+ { uint32_t x51, uint8_t x52 = Op (Syntax.SubWithGetBorrow 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x49, Return x16, 0x3fff);
+ { uint32_t x54, uint8_t x55 = Op (Syntax.SubWithGetBorrow 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x52, Return x18, 0x3fff);
+ { uint32_t x57, uint8_t x58 = Op (Syntax.SubWithGetBorrow 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x55, Return x20, 0x3fff);
+ { uint32_t x60, uint8_t x61 = Op (Syntax.SubWithGetBorrow 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x58, Return x22, 0x7fff);
+ { uint32_t x63, uint8_t x64 = Op (Syntax.SubWithGetBorrow 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x61, Return x24, 0x3fff);
+ { uint32_t x66, uint8_t x67 = Op (Syntax.SubWithGetBorrow 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x64, Return x26, 0x3fff);
+ { uint32_t x69, uint8_t x70 = Op (Syntax.SubWithGetBorrow 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x67, Return x28, 0x3fff);
+ { uint32_t x72, uint8_t x73 = Op (Syntax.SubWithGetBorrow 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x70, Return x27, 0x3fff);
+ { uint32_t x74 = (uint32_t)cmovznz(x73, 0x0, 0xffffffff);
+ { uint32_t x75 = (x74 & 0x7ffd);
+ { uint32_t x77, uint8_t x78 = Op (Syntax.AddWithGetCarry 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x30, Return x75);
+ { uint32_t x79 = (x74 & 0x3fff);
+ { uint32_t x81, uint8_t x82 = Op (Syntax.AddWithGetCarry 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x78, Return x33, Return x79);
+ { uint32_t x83 = (x74 & 0x3fff);
+ { uint32_t x85, uint8_t x86 = Op (Syntax.AddWithGetCarry 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x82, Return x36, Return x83);
+ { uint32_t x87 = (x74 & 0x3fff);
+ { uint32_t x89, uint8_t x90 = Op (Syntax.AddWithGetCarry 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x86, Return x39, Return x87);
+ { uint32_t x91 = (x74 & 0x3fff);
+ { uint32_t x93, uint8_t x94 = Op (Syntax.AddWithGetCarry 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x90, Return x42, Return x91);
+ { uint32_t x95 = (x74 & 0x7fff);
+ { uint32_t x97, uint8_t x98 = Op (Syntax.AddWithGetCarry 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x94, Return x45, Return x95);
+ { uint32_t x99 = (x74 & 0x3fff);
+ { uint32_t x101, uint8_t x102 = Op (Syntax.AddWithGetCarry 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x98, Return x48, Return x99);
+ { uint32_t x103 = (x74 & 0x3fff);
+ { uint32_t x105, uint8_t x106 = Op (Syntax.AddWithGetCarry 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x102, Return x51, Return x103);
+ { uint32_t x107 = (x74 & 0x3fff);
+ { uint32_t x109, uint8_t x110 = Op (Syntax.AddWithGetCarry 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x106, Return x54, Return x107);
+ { uint32_t x111 = (x74 & 0x3fff);
+ { uint32_t x113, uint8_t x114 = Op (Syntax.AddWithGetCarry 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x110, Return x57, Return x111);
+ { uint32_t x115 = (x74 & 0x7fff);
+ { uint32_t x117, uint8_t x118 = Op (Syntax.AddWithGetCarry 15 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x114, Return x60, Return x115);
+ { uint32_t x119 = (x74 & 0x3fff);
+ { uint32_t x121, uint8_t x122 = Op (Syntax.AddWithGetCarry 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x118, Return x63, Return x119);
+ { uint32_t x123 = (x74 & 0x3fff);
+ { uint32_t x125, uint8_t x126 = Op (Syntax.AddWithGetCarry 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x122, Return x66, Return x123);
+ { uint32_t x127 = (x74 & 0x3fff);
+ { uint32_t x129, uint8_t x130 = Op (Syntax.AddWithGetCarry 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x126, Return x69, Return x127);
+ { uint32_t x131 = (x74 & 0x3fff);
+ { uint32_t x133, uint8_t _ = Op (Syntax.AddWithGetCarry 14 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x130, Return x72, Return x131);
+ out[0] = x77;
+ out[1] = x81;
+ out[2] = x85;
+ out[3] = x89;
+ out[4] = x93;
+ out[5] = x97;
+ out[6] = x101;
+ out[7] = x105;
+ out[8] = x109;
+ out[9] = x113;
+ out[10] = x117;
+ out[11] = x121;
+ out[12] = x125;
+ out[13] = x129;
+ out[14] = x133;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e216m2e108m1/femul.c b/src/Specific/solinas32_2e216m2e108m1/femul.c
index 2cd20ddc6..4a4e096c7 100644
--- a/src/Specific/solinas32_2e216m2e108m1/femul.c
+++ b/src/Specific/solinas32_2e216m2e108m1/femul.c
@@ -1,83 +1,81 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint64_t x32 = (((uint64_t)(x11 + x16) * (x25 + x30)) - ((uint64_t)x11 * x25));
-{ uint64_t x33 = ((((uint64_t)(x9 + x17) * (x25 + x30)) + ((uint64_t)(x11 + x16) * (x23 + x31))) - (((uint64_t)x9 * x25) + ((uint64_t)x11 * x23)));
-{ uint64_t x34 = ((((uint64_t)(x7 + x15) * (x25 + x30)) + (((uint64_t)(x9 + x17) * (x23 + x31)) + ((uint64_t)(x11 + x16) * (x21 + x29)))) - (((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + ((uint64_t)x11 * x21))));
-{ uint64_t x35 = ((((uint64_t)(x5 + x13) * (x25 + x30)) + (((uint64_t)(x7 + x15) * (x23 + x31)) + (((uint64_t)(x9 + x17) * (x21 + x29)) + ((uint64_t)(x11 + x16) * (x19 + x27))))) - (((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + ((uint64_t)x11 * x19)))));
-{ uint64_t x36 = ((((uint64_t)(x5 + x13) * (x23 + x31)) + (((uint64_t)(x7 + x15) * (x21 + x29)) + ((uint64_t)(x9 + x17) * (x19 + x27)))) - (((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + ((uint64_t)x9 * x19))));
-{ uint64_t x37 = ((((uint64_t)(x5 + x13) * (x21 + x29)) + ((uint64_t)(x7 + x15) * (x19 + x27))) - (((uint64_t)x5 * x21) + ((uint64_t)x7 * x19)));
-{ uint64_t x38 = (((uint64_t)(x5 + x13) * (x19 + x27)) - ((uint64_t)x5 * x19));
-{ uint64_t x39 = (((((uint64_t)x11 * x25) + ((uint64_t)x16 * x30)) + x36) + x32);
-{ uint64_t x40 = ((((((uint64_t)x9 * x25) + ((uint64_t)x11 * x23)) + (((uint64_t)x17 * x30) + ((uint64_t)x16 * x31))) + x37) + x33);
-{ uint64_t x41 = ((((((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + ((uint64_t)x11 * x21))) + (((uint64_t)x15 * x30) + (((uint64_t)x17 * x31) + ((uint64_t)x16 * x29)))) + x38) + x34);
-{ uint64_t x42 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + ((uint64_t)x11 * x19)))) + (((uint64_t)x13 * x30) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x16 * x27)))));
-{ uint64_t x43 = (((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + ((uint64_t)x9 * x19))) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + ((uint64_t)x17 * x27)))) + x32);
-{ uint64_t x44 = (((((uint64_t)x5 * x21) + ((uint64_t)x7 * x19)) + (((uint64_t)x13 * x29) + ((uint64_t)x15 * x27))) + x33);
-{ uint64_t x45 = ((((uint64_t)x5 * x19) + ((uint64_t)x13 * x27)) + x34);
-{ uint64_t x46 = (x42 >> 0x1b);
-{ uint32_t x47 = ((uint32_t)x42 & 0x7ffffff);
-{ uint64_t x48 = (x35 >> 0x1b);
-{ uint32_t x49 = ((uint32_t)x35 & 0x7ffffff);
-{ uint64_t x50 = ((0x8000000 * x48) + x49);
-{ uint64_t x51 = (x50 >> 0x1b);
-{ uint32_t x52 = ((uint32_t)x50 & 0x7ffffff);
-{ uint64_t x53 = ((x46 + x41) + x51);
-{ uint64_t x54 = (x53 >> 0x1b);
-{ uint32_t x55 = ((uint32_t)x53 & 0x7ffffff);
-{ uint64_t x56 = (x45 + x51);
-{ uint64_t x57 = (x56 >> 0x1b);
-{ uint32_t x58 = ((uint32_t)x56 & 0x7ffffff);
-{ uint64_t x59 = (x54 + x40);
-{ uint64_t x60 = (x59 >> 0x1b);
-{ uint32_t x61 = ((uint32_t)x59 & 0x7ffffff);
-{ uint64_t x62 = (x57 + x44);
-{ uint64_t x63 = (x62 >> 0x1b);
-{ uint32_t x64 = ((uint32_t)x62 & 0x7ffffff);
-{ uint64_t x65 = (x60 + x39);
-{ uint64_t x66 = (x65 >> 0x1b);
-{ uint32_t x67 = ((uint32_t)x65 & 0x7ffffff);
-{ uint64_t x68 = (x63 + x43);
-{ uint64_t x69 = (x68 >> 0x1b);
-{ uint32_t x70 = ((uint32_t)x68 & 0x7ffffff);
-{ uint64_t x71 = (x66 + x52);
-{ uint32_t x72 = (uint32_t) (x71 >> 0x1b);
-{ uint32_t x73 = ((uint32_t)x71 & 0x7ffffff);
-{ uint64_t x74 = (x69 + x47);
-{ uint32_t x75 = (uint32_t) (x74 >> 0x1b);
-{ uint32_t x76 = ((uint32_t)x74 & 0x7ffffff);
-{ uint64_t x77 = (((uint64_t)0x8000000 * x72) + x73);
-{ uint32_t x78 = (uint32_t) (x77 >> 0x1b);
-{ uint32_t x79 = ((uint32_t)x77 & 0x7ffffff);
-{ uint32_t x80 = ((x75 + x55) + x78);
-{ uint32_t x81 = (x80 >> 0x1b);
-{ uint32_t x82 = (x80 & 0x7ffffff);
-{ uint32_t x83 = (x58 + x78);
-{ uint32_t x84 = (x83 >> 0x1b);
-{ uint32_t x85 = (x83 & 0x7ffffff);
-out[0] = x79;
-out[1] = x67;
-out[2] = x81 + x61;
-out[3] = x82;
-out[4] = x76;
-out[5] = x70;
-out[6] = x84 + x64;
-out[7] = x85;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void femul(uint32_t out[8], const uint32_t in1[8], const uint32_t in2[8]) {
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x30 = in2[7];
+ { const uint32_t x31 = in2[6];
+ { const uint32_t x29 = in2[5];
+ { const uint32_t x27 = in2[4];
+ { const uint32_t x25 = in2[3];
+ { const uint32_t x23 = in2[2];
+ { const uint32_t x21 = in2[1];
+ { const uint32_t x19 = in2[0];
+ { uint64_t x32 = (((uint64_t)(x11 + x16) * (x25 + x30)) - ((uint64_t)x11 * x25));
+ { uint64_t x33 = ((((uint64_t)(x9 + x17) * (x25 + x30)) + ((uint64_t)(x11 + x16) * (x23 + x31))) - (((uint64_t)x9 * x25) + ((uint64_t)x11 * x23)));
+ { uint64_t x34 = ((((uint64_t)(x7 + x15) * (x25 + x30)) + (((uint64_t)(x9 + x17) * (x23 + x31)) + ((uint64_t)(x11 + x16) * (x21 + x29)))) - (((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + ((uint64_t)x11 * x21))));
+ { uint64_t x35 = ((((uint64_t)(x5 + x13) * (x25 + x30)) + (((uint64_t)(x7 + x15) * (x23 + x31)) + (((uint64_t)(x9 + x17) * (x21 + x29)) + ((uint64_t)(x11 + x16) * (x19 + x27))))) - (((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + ((uint64_t)x11 * x19)))));
+ { uint64_t x36 = ((((uint64_t)(x5 + x13) * (x23 + x31)) + (((uint64_t)(x7 + x15) * (x21 + x29)) + ((uint64_t)(x9 + x17) * (x19 + x27)))) - (((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + ((uint64_t)x9 * x19))));
+ { uint64_t x37 = ((((uint64_t)(x5 + x13) * (x21 + x29)) + ((uint64_t)(x7 + x15) * (x19 + x27))) - (((uint64_t)x5 * x21) + ((uint64_t)x7 * x19)));
+ { uint64_t x38 = (((uint64_t)(x5 + x13) * (x19 + x27)) - ((uint64_t)x5 * x19));
+ { uint64_t x39 = (((((uint64_t)x11 * x25) + ((uint64_t)x16 * x30)) + x36) + x32);
+ { uint64_t x40 = ((((((uint64_t)x9 * x25) + ((uint64_t)x11 * x23)) + (((uint64_t)x17 * x30) + ((uint64_t)x16 * x31))) + x37) + x33);
+ { uint64_t x41 = ((((((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + ((uint64_t)x11 * x21))) + (((uint64_t)x15 * x30) + (((uint64_t)x17 * x31) + ((uint64_t)x16 * x29)))) + x38) + x34);
+ { uint64_t x42 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + ((uint64_t)x11 * x19)))) + (((uint64_t)x13 * x30) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x16 * x27)))));
+ { uint64_t x43 = (((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + ((uint64_t)x9 * x19))) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + ((uint64_t)x17 * x27)))) + x32);
+ { uint64_t x44 = (((((uint64_t)x5 * x21) + ((uint64_t)x7 * x19)) + (((uint64_t)x13 * x29) + ((uint64_t)x15 * x27))) + x33);
+ { uint64_t x45 = ((((uint64_t)x5 * x19) + ((uint64_t)x13 * x27)) + x34);
+ { uint64_t x46 = (x42 >> 0x1b);
+ { uint32_t x47 = ((uint32_t)x42 & 0x7ffffff);
+ { uint64_t x48 = (x35 >> 0x1b);
+ { uint32_t x49 = ((uint32_t)x35 & 0x7ffffff);
+ { uint64_t x50 = ((0x8000000 * x48) + x49);
+ { uint64_t x51 = (x50 >> 0x1b);
+ { uint32_t x52 = ((uint32_t)x50 & 0x7ffffff);
+ { uint64_t x53 = ((x46 + x41) + x51);
+ { uint64_t x54 = (x53 >> 0x1b);
+ { uint32_t x55 = ((uint32_t)x53 & 0x7ffffff);
+ { uint64_t x56 = (x45 + x51);
+ { uint64_t x57 = (x56 >> 0x1b);
+ { uint32_t x58 = ((uint32_t)x56 & 0x7ffffff);
+ { uint64_t x59 = (x54 + x40);
+ { uint64_t x60 = (x59 >> 0x1b);
+ { uint32_t x61 = ((uint32_t)x59 & 0x7ffffff);
+ { uint64_t x62 = (x57 + x44);
+ { uint64_t x63 = (x62 >> 0x1b);
+ { uint32_t x64 = ((uint32_t)x62 & 0x7ffffff);
+ { uint64_t x65 = (x60 + x39);
+ { uint64_t x66 = (x65 >> 0x1b);
+ { uint32_t x67 = ((uint32_t)x65 & 0x7ffffff);
+ { uint64_t x68 = (x63 + x43);
+ { uint64_t x69 = (x68 >> 0x1b);
+ { uint32_t x70 = ((uint32_t)x68 & 0x7ffffff);
+ { uint64_t x71 = (x66 + x52);
+ { uint32_t x72 = (uint32_t) (x71 >> 0x1b);
+ { uint32_t x73 = ((uint32_t)x71 & 0x7ffffff);
+ { uint64_t x74 = (x69 + x47);
+ { uint32_t x75 = (uint32_t) (x74 >> 0x1b);
+ { uint32_t x76 = ((uint32_t)x74 & 0x7ffffff);
+ { uint64_t x77 = (((uint64_t)0x8000000 * x72) + x73);
+ { uint32_t x78 = (uint32_t) (x77 >> 0x1b);
+ { uint32_t x79 = ((uint32_t)x77 & 0x7ffffff);
+ { uint32_t x80 = ((x75 + x55) + x78);
+ { uint32_t x81 = (x80 >> 0x1b);
+ { uint32_t x82 = (x80 & 0x7ffffff);
+ { uint32_t x83 = (x58 + x78);
+ { uint32_t x84 = (x83 >> 0x1b);
+ { uint32_t x85 = (x83 & 0x7ffffff);
+ out[0] = x85;
+ out[1] = (x84 + x64);
+ out[2] = x70;
+ out[3] = x76;
+ out[4] = x82;
+ out[5] = (x81 + x61);
+ out[6] = x67;
+ out[7] = x79;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e216m2e108m1/fesquare.c b/src/Specific/solinas32_2e216m2e108m1/fesquare.c
index 88056d14c..aee2bd170 100644
--- a/src/Specific/solinas32_2e216m2e108m1/fesquare.c
+++ b/src/Specific/solinas32_2e216m2e108m1/fesquare.c
@@ -1,83 +1,73 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x15 = (((uint64_t)(x8 + x13) * (x8 + x13)) - ((uint64_t)x8 * x8));
-{ uint64_t x16 = ((((uint64_t)(x6 + x14) * (x8 + x13)) + ((uint64_t)(x8 + x13) * (x6 + x14))) - (((uint64_t)x6 * x8) + ((uint64_t)x8 * x6)));
-{ uint64_t x17 = ((((uint64_t)(x4 + x12) * (x8 + x13)) + (((uint64_t)(x6 + x14) * (x6 + x14)) + ((uint64_t)(x8 + x13) * (x4 + x12)))) - (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + ((uint64_t)x8 * x4))));
-{ uint64_t x18 = ((((uint64_t)(x2 + x10) * (x8 + x13)) + (((uint64_t)(x4 + x12) * (x6 + x14)) + (((uint64_t)(x6 + x14) * (x4 + x12)) + ((uint64_t)(x8 + x13) * (x2 + x10))))) - (((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))));
-{ uint64_t x19 = ((((uint64_t)(x2 + x10) * (x6 + x14)) + (((uint64_t)(x4 + x12) * (x4 + x12)) + ((uint64_t)(x6 + x14) * (x2 + x10)))) - (((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))));
-{ uint64_t x20 = ((((uint64_t)(x2 + x10) * (x4 + x12)) + ((uint64_t)(x4 + x12) * (x2 + x10))) - (((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)));
-{ uint64_t x21 = (((uint64_t)(x2 + x10) * (x2 + x10)) - ((uint64_t)x2 * x2));
-{ uint64_t x22 = (((((uint64_t)x8 * x8) + ((uint64_t)x13 * x13)) + x19) + x15);
-{ uint64_t x23 = ((((((uint64_t)x6 * x8) + ((uint64_t)x8 * x6)) + (((uint64_t)x14 * x13) + ((uint64_t)x13 * x14))) + x20) + x16);
-{ uint64_t x24 = ((((((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + ((uint64_t)x8 * x4))) + (((uint64_t)x12 * x13) + (((uint64_t)x14 * x14) + ((uint64_t)x13 * x12)))) + x21) + x17);
-{ uint64_t x25 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (((uint64_t)x10 * x13) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x13 * x10)))));
-{ uint64_t x26 = (((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + ((uint64_t)x14 * x10)))) + x15);
-{ uint64_t x27 = (((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (((uint64_t)x10 * x12) + ((uint64_t)x12 * x10))) + x16);
-{ uint64_t x28 = ((((uint64_t)x2 * x2) + ((uint64_t)x10 * x10)) + x17);
-{ uint64_t x29 = (x25 >> 0x1b);
-{ uint32_t x30 = ((uint32_t)x25 & 0x7ffffff);
-{ uint64_t x31 = (x18 >> 0x1b);
-{ uint32_t x32 = ((uint32_t)x18 & 0x7ffffff);
-{ uint64_t x33 = ((0x8000000 * x31) + x32);
-{ uint64_t x34 = (x33 >> 0x1b);
-{ uint32_t x35 = ((uint32_t)x33 & 0x7ffffff);
-{ uint64_t x36 = ((x29 + x24) + x34);
-{ uint64_t x37 = (x36 >> 0x1b);
-{ uint32_t x38 = ((uint32_t)x36 & 0x7ffffff);
-{ uint64_t x39 = (x28 + x34);
-{ uint64_t x40 = (x39 >> 0x1b);
-{ uint32_t x41 = ((uint32_t)x39 & 0x7ffffff);
-{ uint64_t x42 = (x37 + x23);
-{ uint64_t x43 = (x42 >> 0x1b);
-{ uint32_t x44 = ((uint32_t)x42 & 0x7ffffff);
-{ uint64_t x45 = (x40 + x27);
-{ uint64_t x46 = (x45 >> 0x1b);
-{ uint32_t x47 = ((uint32_t)x45 & 0x7ffffff);
-{ uint64_t x48 = (x43 + x22);
-{ uint64_t x49 = (x48 >> 0x1b);
-{ uint32_t x50 = ((uint32_t)x48 & 0x7ffffff);
-{ uint64_t x51 = (x46 + x26);
-{ uint64_t x52 = (x51 >> 0x1b);
-{ uint32_t x53 = ((uint32_t)x51 & 0x7ffffff);
-{ uint64_t x54 = (x49 + x35);
-{ uint32_t x55 = (uint32_t) (x54 >> 0x1b);
-{ uint32_t x56 = ((uint32_t)x54 & 0x7ffffff);
-{ uint64_t x57 = (x52 + x30);
-{ uint32_t x58 = (uint32_t) (x57 >> 0x1b);
-{ uint32_t x59 = ((uint32_t)x57 & 0x7ffffff);
-{ uint64_t x60 = (((uint64_t)0x8000000 * x55) + x56);
-{ uint32_t x61 = (uint32_t) (x60 >> 0x1b);
-{ uint32_t x62 = ((uint32_t)x60 & 0x7ffffff);
-{ uint32_t x63 = ((x58 + x38) + x61);
-{ uint32_t x64 = (x63 >> 0x1b);
-{ uint32_t x65 = (x63 & 0x7ffffff);
-{ uint32_t x66 = (x41 + x61);
-{ uint32_t x67 = (x66 >> 0x1b);
-{ uint32_t x68 = (x66 & 0x7ffffff);
-out[0] = x62;
-out[1] = x50;
-out[2] = x64 + x44;
-out[3] = x65;
-out[4] = x59;
-out[5] = x53;
-out[6] = x67 + x47;
-out[7] = x68;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void fesquare(uint32_t out[8], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x15 = (((uint64_t)(x8 + x13) * (x8 + x13)) - ((uint64_t)x8 * x8));
+ { uint64_t x16 = ((((uint64_t)(x6 + x14) * (x8 + x13)) + ((uint64_t)(x8 + x13) * (x6 + x14))) - (((uint64_t)x6 * x8) + ((uint64_t)x8 * x6)));
+ { uint64_t x17 = ((((uint64_t)(x4 + x12) * (x8 + x13)) + (((uint64_t)(x6 + x14) * (x6 + x14)) + ((uint64_t)(x8 + x13) * (x4 + x12)))) - (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + ((uint64_t)x8 * x4))));
+ { uint64_t x18 = ((((uint64_t)(x2 + x10) * (x8 + x13)) + (((uint64_t)(x4 + x12) * (x6 + x14)) + (((uint64_t)(x6 + x14) * (x4 + x12)) + ((uint64_t)(x8 + x13) * (x2 + x10))))) - (((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))));
+ { uint64_t x19 = ((((uint64_t)(x2 + x10) * (x6 + x14)) + (((uint64_t)(x4 + x12) * (x4 + x12)) + ((uint64_t)(x6 + x14) * (x2 + x10)))) - (((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))));
+ { uint64_t x20 = ((((uint64_t)(x2 + x10) * (x4 + x12)) + ((uint64_t)(x4 + x12) * (x2 + x10))) - (((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)));
+ { uint64_t x21 = (((uint64_t)(x2 + x10) * (x2 + x10)) - ((uint64_t)x2 * x2));
+ { uint64_t x22 = (((((uint64_t)x8 * x8) + ((uint64_t)x13 * x13)) + x19) + x15);
+ { uint64_t x23 = ((((((uint64_t)x6 * x8) + ((uint64_t)x8 * x6)) + (((uint64_t)x14 * x13) + ((uint64_t)x13 * x14))) + x20) + x16);
+ { uint64_t x24 = ((((((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + ((uint64_t)x8 * x4))) + (((uint64_t)x12 * x13) + (((uint64_t)x14 * x14) + ((uint64_t)x13 * x12)))) + x21) + x17);
+ { uint64_t x25 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (((uint64_t)x10 * x13) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x13 * x10)))));
+ { uint64_t x26 = (((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + ((uint64_t)x14 * x10)))) + x15);
+ { uint64_t x27 = (((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (((uint64_t)x10 * x12) + ((uint64_t)x12 * x10))) + x16);
+ { uint64_t x28 = ((((uint64_t)x2 * x2) + ((uint64_t)x10 * x10)) + x17);
+ { uint64_t x29 = (x25 >> 0x1b);
+ { uint32_t x30 = ((uint32_t)x25 & 0x7ffffff);
+ { uint64_t x31 = (x18 >> 0x1b);
+ { uint32_t x32 = ((uint32_t)x18 & 0x7ffffff);
+ { uint64_t x33 = ((0x8000000 * x31) + x32);
+ { uint64_t x34 = (x33 >> 0x1b);
+ { uint32_t x35 = ((uint32_t)x33 & 0x7ffffff);
+ { uint64_t x36 = ((x29 + x24) + x34);
+ { uint64_t x37 = (x36 >> 0x1b);
+ { uint32_t x38 = ((uint32_t)x36 & 0x7ffffff);
+ { uint64_t x39 = (x28 + x34);
+ { uint64_t x40 = (x39 >> 0x1b);
+ { uint32_t x41 = ((uint32_t)x39 & 0x7ffffff);
+ { uint64_t x42 = (x37 + x23);
+ { uint64_t x43 = (x42 >> 0x1b);
+ { uint32_t x44 = ((uint32_t)x42 & 0x7ffffff);
+ { uint64_t x45 = (x40 + x27);
+ { uint64_t x46 = (x45 >> 0x1b);
+ { uint32_t x47 = ((uint32_t)x45 & 0x7ffffff);
+ { uint64_t x48 = (x43 + x22);
+ { uint64_t x49 = (x48 >> 0x1b);
+ { uint32_t x50 = ((uint32_t)x48 & 0x7ffffff);
+ { uint64_t x51 = (x46 + x26);
+ { uint64_t x52 = (x51 >> 0x1b);
+ { uint32_t x53 = ((uint32_t)x51 & 0x7ffffff);
+ { uint64_t x54 = (x49 + x35);
+ { uint32_t x55 = (uint32_t) (x54 >> 0x1b);
+ { uint32_t x56 = ((uint32_t)x54 & 0x7ffffff);
+ { uint64_t x57 = (x52 + x30);
+ { uint32_t x58 = (uint32_t) (x57 >> 0x1b);
+ { uint32_t x59 = ((uint32_t)x57 & 0x7ffffff);
+ { uint64_t x60 = (((uint64_t)0x8000000 * x55) + x56);
+ { uint32_t x61 = (uint32_t) (x60 >> 0x1b);
+ { uint32_t x62 = ((uint32_t)x60 & 0x7ffffff);
+ { uint32_t x63 = ((x58 + x38) + x61);
+ { uint32_t x64 = (x63 >> 0x1b);
+ { uint32_t x65 = (x63 & 0x7ffffff);
+ { uint32_t x66 = (x41 + x61);
+ { uint32_t x67 = (x66 >> 0x1b);
+ { uint32_t x68 = (x66 & 0x7ffffff);
+ out[0] = x68;
+ out[1] = (x67 + x47);
+ out[2] = x53;
+ out[3] = x59;
+ out[4] = x65;
+ out[5] = (x64 + x44);
+ out[6] = x50;
+ out[7] = x62;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e216m2e108m1/freeze.c b/src/Specific/solinas32_2e216m2e108m1/freeze.c
index f29663ddb..a47de88e6 100644
--- a/src/Specific/solinas32_2e216m2e108m1/freeze.c
+++ b/src/Specific/solinas32_2e216m2e108m1/freeze.c
@@ -1,25 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x16;
-out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 27 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7ffffff;;
+static void freeze(uint32_t out[8], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x7ffffff);
+ { uint32_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x17, Return x4, 0x7ffffff);
+ { uint32_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x20, Return x6, 0x7ffffff);
+ { uint32_t x25, uint8_t x26 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x23, Return x8, 0x7ffffff);
+ { uint32_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x26, Return x10, 0x7fffffe);
+ { uint32_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x29, Return x12, 0x7ffffff);
+ { uint32_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x32, Return x14, 0x7ffffff);
+ { uint32_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x35, Return x13, 0x7ffffff);
+ { uint32_t x39 = (uint32_t)cmovznz(x38, 0x0, 0xffffffff);
+ { uint32_t x40 = (x39 & 0x7ffffff);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x16, Return x40);
+ { uint32_t x44 = (x39 & 0x7ffffff);
+ { uint32_t x46, uint8_t x47 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x43, Return x19, Return x44);
+ { uint32_t x48 = (x39 & 0x7ffffff);
+ { uint32_t x50, uint8_t x51 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x47, Return x22, Return x48);
+ { uint32_t x52 = (x39 & 0x7ffffff);
+ { uint32_t x54, uint8_t x55 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x51, Return x25, Return x52);
+ { uint32_t x56 = (x39 & 0x7fffffe);
+ { uint32_t x58, uint8_t x59 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x55, Return x28, Return x56);
+ { uint32_t x60 = (x39 & 0x7ffffff);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x59, Return x31, Return x60);
+ { uint32_t x64 = (x39 & 0x7ffffff);
+ { uint32_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x34, Return x64);
+ { uint32_t x68 = (x39 & 0x7ffffff);
+ { uint32_t x70, uint8_t _ = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x67, Return x37, Return x68);
+ out[0] = x42;
+ out[1] = x46;
+ out[2] = x50;
+ out[3] = x54;
+ out[4] = x58;
+ out[5] = x62;
+ out[6] = x66;
+ out[7] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e221m3/femul.c b/src/Specific/solinas32_2e221m3/femul.c
index 53e74bcf0..92caba461 100644
--- a/src/Specific/solinas32_2e221m3/femul.c
+++ b/src/Specific/solinas32_2e221m3/femul.c
@@ -1,76 +1,78 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
-{ uint64_t x40 = (((uint64_t)x5 * x38) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((0x2 * ((uint64_t)x11 * x35)) + ((0x2 * ((uint64_t)x13 * x33)) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((0x2 * ((uint64_t)x19 * x27)) + ((0x2 * ((uint64_t)x21 * x25)) + ((uint64_t)x20 * x23))))))))));
-{ uint64_t x41 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + ((0x2 * ((uint64_t)x11 * x33)) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((0x2 * ((uint64_t)x17 * x27)) + ((0x2 * ((uint64_t)x19 * x25)) + ((uint64_t)x21 * x23))))))))) + (0x3 * ((uint64_t)x20 * x38)));
-{ uint64_t x42 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + ((0x2 * ((uint64_t)x9 * x33)) + ((0x2 * ((uint64_t)x11 * x31)) + ((0x2 * ((uint64_t)x13 * x29)) + ((0x2 * ((uint64_t)x15 * x27)) + ((0x2 * ((uint64_t)x17 * x25)) + ((uint64_t)x19 * x23)))))))) + (0x3 * (((uint64_t)x21 * x38) + ((uint64_t)x20 * x39))));
-{ uint64_t x43 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((0x2 * ((uint64_t)x13 * x27)) + ((0x2 * ((uint64_t)x15 * x25)) + ((uint64_t)x17 * x23))))))) + (0x3 * (((uint64_t)x19 * x38) + (((uint64_t)x21 * x39) + ((uint64_t)x20 * x37)))));
-{ uint64_t x44 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((0x2 * ((uint64_t)x11 * x27)) + ((0x2 * ((uint64_t)x13 * x25)) + ((uint64_t)x15 * x23)))))) + (0x3 * (((uint64_t)x17 * x38) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + ((uint64_t)x20 * x35))))));
-{ uint64_t x45 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((0x2 * ((uint64_t)x9 * x27)) + ((0x2 * ((uint64_t)x11 * x25)) + ((uint64_t)x13 * x23))))) + (0x3 * (((uint64_t)x15 * x38) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + ((uint64_t)x20 * x33)))))));
-{ uint64_t x46 = ((((uint64_t)x5 * x29) + ((0x2 * ((uint64_t)x7 * x27)) + ((0x2 * ((uint64_t)x9 * x25)) + ((uint64_t)x11 * x23)))) + (0x3 * (((uint64_t)x13 * x38) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((uint64_t)x20 * x31))))))));
-{ uint64_t x47 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((uint64_t)x9 * x23))) + (0x3 * (((uint64_t)x11 * x38) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((uint64_t)x20 * x29)))))))));
-{ uint64_t x48 = ((((uint64_t)x5 * x25) + ((uint64_t)x7 * x23)) + (0x3 * (((uint64_t)x9 * x38) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x20 * x27))))))))));
-{ uint64_t x49 = (((uint64_t)x5 * x23) + (0x3 * ((0x2 * ((uint64_t)x7 * x38)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((0x2 * ((uint64_t)x13 * x35)) + ((0x2 * ((uint64_t)x15 * x33)) + ((0x2 * ((uint64_t)x17 * x31)) + ((0x2 * ((uint64_t)x19 * x29)) + ((0x2 * ((uint64_t)x21 * x27)) + (0x2 * ((uint64_t)x20 * x25))))))))))));
-{ uint32_t x50 = (uint32_t) (x49 >> 0x17);
-{ uint32_t x51 = ((uint32_t)x49 & 0x7fffff);
-{ uint64_t x52 = (x50 + x48);
-{ uint32_t x53 = (uint32_t) (x52 >> 0x16);
-{ uint32_t x54 = ((uint32_t)x52 & 0x3fffff);
-{ uint64_t x55 = (x53 + x47);
-{ uint32_t x56 = (uint32_t) (x55 >> 0x16);
-{ uint32_t x57 = ((uint32_t)x55 & 0x3fffff);
-{ uint64_t x58 = (x56 + x46);
-{ uint32_t x59 = (uint32_t) (x58 >> 0x16);
-{ uint32_t x60 = ((uint32_t)x58 & 0x3fffff);
-{ uint64_t x61 = (x59 + x45);
-{ uint32_t x62 = (uint32_t) (x61 >> 0x16);
-{ uint32_t x63 = ((uint32_t)x61 & 0x3fffff);
-{ uint64_t x64 = (x62 + x44);
-{ uint32_t x65 = (uint32_t) (x64 >> 0x16);
-{ uint32_t x66 = ((uint32_t)x64 & 0x3fffff);
-{ uint64_t x67 = (x65 + x43);
-{ uint32_t x68 = (uint32_t) (x67 >> 0x16);
-{ uint32_t x69 = ((uint32_t)x67 & 0x3fffff);
-{ uint64_t x70 = (x68 + x42);
-{ uint32_t x71 = (uint32_t) (x70 >> 0x16);
-{ uint32_t x72 = ((uint32_t)x70 & 0x3fffff);
-{ uint64_t x73 = (x71 + x41);
-{ uint32_t x74 = (uint32_t) (x73 >> 0x16);
-{ uint32_t x75 = ((uint32_t)x73 & 0x3fffff);
-{ uint64_t x76 = (x74 + x40);
-{ uint32_t x77 = (uint32_t) (x76 >> 0x16);
-{ uint32_t x78 = ((uint32_t)x76 & 0x3fffff);
-{ uint32_t x79 = (x51 + (0x3 * x77));
-{ uint32_t x80 = (x79 >> 0x17);
-{ uint32_t x81 = (x79 & 0x7fffff);
-{ uint32_t x82 = (x80 + x54);
-{ uint32_t x83 = (x82 >> 0x16);
-{ uint32_t x84 = (x82 & 0x3fffff);
-out[0] = x78;
-out[1] = x75;
-out[2] = x72;
-out[3] = x69;
-out[4] = x66;
-out[5] = x63;
-out[6] = x60;
-out[7] = x83 + x57;
-out[8] = x84;
-out[9] = x81;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void femul(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) {
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x38 = in2[9];
+ { const uint32_t x39 = in2[8];
+ { const uint32_t x37 = in2[7];
+ { const uint32_t x35 = in2[6];
+ { const uint32_t x33 = in2[5];
+ { const uint32_t x31 = in2[4];
+ { const uint32_t x29 = in2[3];
+ { const uint32_t x27 = in2[2];
+ { const uint32_t x25 = in2[1];
+ { const uint32_t x23 = in2[0];
+ { uint64_t x40 = (((uint64_t)x5 * x38) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((0x2 * ((uint64_t)x11 * x35)) + ((0x2 * ((uint64_t)x13 * x33)) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((0x2 * ((uint64_t)x19 * x27)) + ((0x2 * ((uint64_t)x21 * x25)) + ((uint64_t)x20 * x23))))))))));
+ { uint64_t x41 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + ((0x2 * ((uint64_t)x11 * x33)) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((0x2 * ((uint64_t)x17 * x27)) + ((0x2 * ((uint64_t)x19 * x25)) + ((uint64_t)x21 * x23))))))))) + (0x3 * ((uint64_t)x20 * x38)));
+ { uint64_t x42 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + ((0x2 * ((uint64_t)x9 * x33)) + ((0x2 * ((uint64_t)x11 * x31)) + ((0x2 * ((uint64_t)x13 * x29)) + ((0x2 * ((uint64_t)x15 * x27)) + ((0x2 * ((uint64_t)x17 * x25)) + ((uint64_t)x19 * x23)))))))) + (0x3 * (((uint64_t)x21 * x38) + ((uint64_t)x20 * x39))));
+ { uint64_t x43 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((0x2 * ((uint64_t)x13 * x27)) + ((0x2 * ((uint64_t)x15 * x25)) + ((uint64_t)x17 * x23))))))) + (0x3 * (((uint64_t)x19 * x38) + (((uint64_t)x21 * x39) + ((uint64_t)x20 * x37)))));
+ { uint64_t x44 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((0x2 * ((uint64_t)x11 * x27)) + ((0x2 * ((uint64_t)x13 * x25)) + ((uint64_t)x15 * x23)))))) + (0x3 * (((uint64_t)x17 * x38) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + ((uint64_t)x20 * x35))))));
+ { uint64_t x45 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((0x2 * ((uint64_t)x9 * x27)) + ((0x2 * ((uint64_t)x11 * x25)) + ((uint64_t)x13 * x23))))) + (0x3 * (((uint64_t)x15 * x38) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + ((uint64_t)x20 * x33)))))));
+ { uint64_t x46 = ((((uint64_t)x5 * x29) + ((0x2 * ((uint64_t)x7 * x27)) + ((0x2 * ((uint64_t)x9 * x25)) + ((uint64_t)x11 * x23)))) + (0x3 * (((uint64_t)x13 * x38) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((uint64_t)x20 * x31))))))));
+ { uint64_t x47 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((uint64_t)x9 * x23))) + (0x3 * (((uint64_t)x11 * x38) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((uint64_t)x20 * x29)))))))));
+ { uint64_t x48 = ((((uint64_t)x5 * x25) + ((uint64_t)x7 * x23)) + (0x3 * (((uint64_t)x9 * x38) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x20 * x27))))))))));
+ { uint64_t x49 = (((uint64_t)x5 * x23) + (0x3 * ((0x2 * ((uint64_t)x7 * x38)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((0x2 * ((uint64_t)x13 * x35)) + ((0x2 * ((uint64_t)x15 * x33)) + ((0x2 * ((uint64_t)x17 * x31)) + ((0x2 * ((uint64_t)x19 * x29)) + ((0x2 * ((uint64_t)x21 * x27)) + (0x2 * ((uint64_t)x20 * x25))))))))))));
+ { uint32_t x50 = (uint32_t) (x49 >> 0x17);
+ { uint32_t x51 = ((uint32_t)x49 & 0x7fffff);
+ { uint64_t x52 = (x50 + x48);
+ { uint32_t x53 = (uint32_t) (x52 >> 0x16);
+ { uint32_t x54 = ((uint32_t)x52 & 0x3fffff);
+ { uint64_t x55 = (x53 + x47);
+ { uint32_t x56 = (uint32_t) (x55 >> 0x16);
+ { uint32_t x57 = ((uint32_t)x55 & 0x3fffff);
+ { uint64_t x58 = (x56 + x46);
+ { uint32_t x59 = (uint32_t) (x58 >> 0x16);
+ { uint32_t x60 = ((uint32_t)x58 & 0x3fffff);
+ { uint64_t x61 = (x59 + x45);
+ { uint32_t x62 = (uint32_t) (x61 >> 0x16);
+ { uint32_t x63 = ((uint32_t)x61 & 0x3fffff);
+ { uint64_t x64 = (x62 + x44);
+ { uint32_t x65 = (uint32_t) (x64 >> 0x16);
+ { uint32_t x66 = ((uint32_t)x64 & 0x3fffff);
+ { uint64_t x67 = (x65 + x43);
+ { uint32_t x68 = (uint32_t) (x67 >> 0x16);
+ { uint32_t x69 = ((uint32_t)x67 & 0x3fffff);
+ { uint64_t x70 = (x68 + x42);
+ { uint32_t x71 = (uint32_t) (x70 >> 0x16);
+ { uint32_t x72 = ((uint32_t)x70 & 0x3fffff);
+ { uint64_t x73 = (x71 + x41);
+ { uint32_t x74 = (uint32_t) (x73 >> 0x16);
+ { uint32_t x75 = ((uint32_t)x73 & 0x3fffff);
+ { uint64_t x76 = (x74 + x40);
+ { uint32_t x77 = (uint32_t) (x76 >> 0x16);
+ { uint32_t x78 = ((uint32_t)x76 & 0x3fffff);
+ { uint32_t x79 = (x51 + (0x3 * x77));
+ { uint32_t x80 = (x79 >> 0x17);
+ { uint32_t x81 = (x79 & 0x7fffff);
+ { uint32_t x82 = (x80 + x54);
+ { uint32_t x83 = (x82 >> 0x16);
+ { uint32_t x84 = (x82 & 0x3fffff);
+ out[0] = x81;
+ out[1] = x84;
+ out[2] = (x83 + x57);
+ out[3] = x60;
+ out[4] = x63;
+ out[5] = x66;
+ out[6] = x69;
+ out[7] = x72;
+ out[8] = x75;
+ out[9] = x78;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e221m3/fesquare.c b/src/Specific/solinas32_2e221m3/fesquare.c
index 8c7950578..8f03a389f 100644
--- a/src/Specific/solinas32_2e221m3/fesquare.c
+++ b/src/Specific/solinas32_2e221m3/fesquare.c
@@ -1,76 +1,68 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x19 = (((uint64_t)x2 * x17) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x17 * x2))))))))));
-{ uint64_t x20 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x3 * ((uint64_t)x17 * x17)));
-{ uint64_t x21 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x3 * (((uint64_t)x18 * x17) + ((uint64_t)x17 * x18))));
-{ uint64_t x22 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x3 * (((uint64_t)x16 * x17) + (((uint64_t)x18 * x18) + ((uint64_t)x17 * x16)))));
-{ uint64_t x23 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x3 * (((uint64_t)x14 * x17) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((uint64_t)x17 * x14))))));
-{ uint64_t x24 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x3 * (((uint64_t)x12 * x17) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + ((uint64_t)x17 * x12)))))));
-{ uint64_t x25 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x3 * (((uint64_t)x10 * x17) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + ((uint64_t)x17 * x10))))))));
-{ uint64_t x26 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x3 * (((uint64_t)x8 * x17) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + ((uint64_t)x17 * x8)))))))));
-{ uint64_t x27 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x3 * (((uint64_t)x6 * x17) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((uint64_t)x17 * x6))))))))));
-{ uint64_t x28 = (((uint64_t)x2 * x2) + (0x3 * ((0x2 * ((uint64_t)x4 * x17)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + (0x2 * ((uint64_t)x17 * x4))))))))))));
-{ uint32_t x29 = (uint32_t) (x28 >> 0x17);
-{ uint32_t x30 = ((uint32_t)x28 & 0x7fffff);
-{ uint64_t x31 = (x29 + x27);
-{ uint32_t x32 = (uint32_t) (x31 >> 0x16);
-{ uint32_t x33 = ((uint32_t)x31 & 0x3fffff);
-{ uint64_t x34 = (x32 + x26);
-{ uint32_t x35 = (uint32_t) (x34 >> 0x16);
-{ uint32_t x36 = ((uint32_t)x34 & 0x3fffff);
-{ uint64_t x37 = (x35 + x25);
-{ uint32_t x38 = (uint32_t) (x37 >> 0x16);
-{ uint32_t x39 = ((uint32_t)x37 & 0x3fffff);
-{ uint64_t x40 = (x38 + x24);
-{ uint32_t x41 = (uint32_t) (x40 >> 0x16);
-{ uint32_t x42 = ((uint32_t)x40 & 0x3fffff);
-{ uint64_t x43 = (x41 + x23);
-{ uint32_t x44 = (uint32_t) (x43 >> 0x16);
-{ uint32_t x45 = ((uint32_t)x43 & 0x3fffff);
-{ uint64_t x46 = (x44 + x22);
-{ uint32_t x47 = (uint32_t) (x46 >> 0x16);
-{ uint32_t x48 = ((uint32_t)x46 & 0x3fffff);
-{ uint64_t x49 = (x47 + x21);
-{ uint32_t x50 = (uint32_t) (x49 >> 0x16);
-{ uint32_t x51 = ((uint32_t)x49 & 0x3fffff);
-{ uint64_t x52 = (x50 + x20);
-{ uint32_t x53 = (uint32_t) (x52 >> 0x16);
-{ uint32_t x54 = ((uint32_t)x52 & 0x3fffff);
-{ uint64_t x55 = (x53 + x19);
-{ uint32_t x56 = (uint32_t) (x55 >> 0x16);
-{ uint32_t x57 = ((uint32_t)x55 & 0x3fffff);
-{ uint32_t x58 = (x30 + (0x3 * x56));
-{ uint32_t x59 = (x58 >> 0x17);
-{ uint32_t x60 = (x58 & 0x7fffff);
-{ uint32_t x61 = (x59 + x33);
-{ uint32_t x62 = (x61 >> 0x16);
-{ uint32_t x63 = (x61 & 0x3fffff);
-out[0] = x57;
-out[1] = x54;
-out[2] = x51;
-out[3] = x48;
-out[4] = x45;
-out[5] = x42;
-out[6] = x39;
-out[7] = x62 + x36;
-out[8] = x63;
-out[9] = x60;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void fesquare(uint32_t out[10], const uint32_t in1[10]) {
+ { const uint32_t x17 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x19 = (((uint64_t)x2 * x17) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x17 * x2))))))))));
+ { uint64_t x20 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x3 * ((uint64_t)x17 * x17)));
+ { uint64_t x21 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x3 * (((uint64_t)x18 * x17) + ((uint64_t)x17 * x18))));
+ { uint64_t x22 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x3 * (((uint64_t)x16 * x17) + (((uint64_t)x18 * x18) + ((uint64_t)x17 * x16)))));
+ { uint64_t x23 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x3 * (((uint64_t)x14 * x17) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((uint64_t)x17 * x14))))));
+ { uint64_t x24 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x3 * (((uint64_t)x12 * x17) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + ((uint64_t)x17 * x12)))))));
+ { uint64_t x25 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x3 * (((uint64_t)x10 * x17) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + ((uint64_t)x17 * x10))))))));
+ { uint64_t x26 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x3 * (((uint64_t)x8 * x17) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + ((uint64_t)x17 * x8)))))))));
+ { uint64_t x27 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x3 * (((uint64_t)x6 * x17) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((uint64_t)x17 * x6))))))))));
+ { uint64_t x28 = (((uint64_t)x2 * x2) + (0x3 * ((0x2 * ((uint64_t)x4 * x17)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + (0x2 * ((uint64_t)x17 * x4))))))))))));
+ { uint32_t x29 = (uint32_t) (x28 >> 0x17);
+ { uint32_t x30 = ((uint32_t)x28 & 0x7fffff);
+ { uint64_t x31 = (x29 + x27);
+ { uint32_t x32 = (uint32_t) (x31 >> 0x16);
+ { uint32_t x33 = ((uint32_t)x31 & 0x3fffff);
+ { uint64_t x34 = (x32 + x26);
+ { uint32_t x35 = (uint32_t) (x34 >> 0x16);
+ { uint32_t x36 = ((uint32_t)x34 & 0x3fffff);
+ { uint64_t x37 = (x35 + x25);
+ { uint32_t x38 = (uint32_t) (x37 >> 0x16);
+ { uint32_t x39 = ((uint32_t)x37 & 0x3fffff);
+ { uint64_t x40 = (x38 + x24);
+ { uint32_t x41 = (uint32_t) (x40 >> 0x16);
+ { uint32_t x42 = ((uint32_t)x40 & 0x3fffff);
+ { uint64_t x43 = (x41 + x23);
+ { uint32_t x44 = (uint32_t) (x43 >> 0x16);
+ { uint32_t x45 = ((uint32_t)x43 & 0x3fffff);
+ { uint64_t x46 = (x44 + x22);
+ { uint32_t x47 = (uint32_t) (x46 >> 0x16);
+ { uint32_t x48 = ((uint32_t)x46 & 0x3fffff);
+ { uint64_t x49 = (x47 + x21);
+ { uint32_t x50 = (uint32_t) (x49 >> 0x16);
+ { uint32_t x51 = ((uint32_t)x49 & 0x3fffff);
+ { uint64_t x52 = (x50 + x20);
+ { uint32_t x53 = (uint32_t) (x52 >> 0x16);
+ { uint32_t x54 = ((uint32_t)x52 & 0x3fffff);
+ { uint64_t x55 = (x53 + x19);
+ { uint32_t x56 = (uint32_t) (x55 >> 0x16);
+ { uint32_t x57 = ((uint32_t)x55 & 0x3fffff);
+ { uint32_t x58 = (x30 + (0x3 * x56));
+ { uint32_t x59 = (x58 >> 0x17);
+ { uint32_t x60 = (x58 & 0x7fffff);
+ { uint32_t x61 = (x59 + x33);
+ { uint32_t x62 = (x61 >> 0x16);
+ { uint32_t x63 = (x61 & 0x3fffff);
+ out[0] = x60;
+ out[1] = x63;
+ out[2] = (x62 + x36);
+ out[3] = x39;
+ out[4] = x42;
+ out[5] = x45;
+ out[6] = x48;
+ out[7] = x51;
+ out[8] = x54;
+ out[9] = x57;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e221m3/freeze.c b/src/Specific/solinas32_2e221m3/freeze.c
index 88476affb..79656b2c4 100644
--- a/src/Specific/solinas32_2e221m3/freeze.c
+++ b/src/Specific/solinas32_2e221m3/freeze.c
@@ -1,25 +1,54 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x20;
-out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 23 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7ffffd;;
+static void freeze(uint32_t out[10], const uint32_t in1[10]) {
+ { const uint32_t x17 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x20, uint8_t x21 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x7ffffd);
+ { uint32_t x23, uint8_t x24 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x21, Return x4, 0x3fffff);
+ { uint32_t x26, uint8_t x27 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x24, Return x6, 0x3fffff);
+ { uint32_t x29, uint8_t x30 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x27, Return x8, 0x3fffff);
+ { uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x30, Return x10, 0x3fffff);
+ { uint32_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x12, 0x3fffff);
+ { uint32_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x36, Return x14, 0x3fffff);
+ { uint32_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x39, Return x16, 0x3fffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x18, 0x3fffff);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x17, 0x3fffff);
+ { uint32_t x49 = (uint32_t)cmovznz(x48, 0x0, 0xffffffff);
+ { uint32_t x50 = (x49 & 0x7ffffd);
+ { uint32_t x52, uint8_t x53 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x20, Return x50);
+ { uint32_t x54 = (x49 & 0x3fffff);
+ { uint32_t x56, uint8_t x57 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x53, Return x23, Return x54);
+ { uint32_t x58 = (x49 & 0x3fffff);
+ { uint32_t x60, uint8_t x61 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x26, Return x58);
+ { uint32_t x62 = (x49 & 0x3fffff);
+ { uint32_t x64, uint8_t x65 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x61, Return x29, Return x62);
+ { uint32_t x66 = (x49 & 0x3fffff);
+ { uint32_t x68, uint8_t x69 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x65, Return x32, Return x66);
+ { uint32_t x70 = (x49 & 0x3fffff);
+ { uint32_t x72, uint8_t x73 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x69, Return x35, Return x70);
+ { uint32_t x74 = (x49 & 0x3fffff);
+ { uint32_t x76, uint8_t x77 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x73, Return x38, Return x74);
+ { uint32_t x78 = (x49 & 0x3fffff);
+ { uint32_t x80, uint8_t x81 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x77, Return x41, Return x78);
+ { uint32_t x82 = (x49 & 0x3fffff);
+ { uint32_t x84, uint8_t x85 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x81, Return x44, Return x82);
+ { uint32_t x86 = (x49 & 0x3fffff);
+ { uint32_t x88, uint8_t _ = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x85, Return x47, Return x86);
+ out[0] = x52;
+ out[1] = x56;
+ out[2] = x60;
+ out[3] = x64;
+ out[4] = x68;
+ out[5] = x72;
+ out[6] = x76;
+ out[7] = x80;
+ out[8] = x84;
+ out[9] = x88;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e222m117/femul.c b/src/Specific/solinas32_2e222m117/femul.c
index c7fac756f..a890cbb84 100644
--- a/src/Specific/solinas32_2e222m117/femul.c
+++ b/src/Specific/solinas32_2e222m117/femul.c
@@ -1,76 +1,78 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
-{ uint64_t x40 = (((uint64_t)x5 * x38) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((0x2 * ((uint64_t)x11 * x35)) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + ((0x2 * ((uint64_t)x17 * x29)) + ((0x2 * ((uint64_t)x19 * x27)) + ((0x2 * ((uint64_t)x21 * x25)) + ((uint64_t)x20 * x23))))))))));
-{ uint64_t x41 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + ((0x2 * ((uint64_t)x17 * x27)) + ((0x2 * ((uint64_t)x19 * x25)) + ((uint64_t)x21 * x23))))))))) + (0x75 * ((uint64_t)x20 * x38)));
-{ uint64_t x42 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + ((0x2 * ((uint64_t)x17 * x25)) + ((uint64_t)x19 * x23)))))))) + (0x75 * (((uint64_t)x21 * x38) + ((uint64_t)x20 * x39))));
-{ uint64_t x43 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + ((uint64_t)x17 * x23))))))) + (0x75 * (((uint64_t)x19 * x38) + (((uint64_t)x21 * x39) + ((uint64_t)x20 * x37)))));
-{ uint64_t x44 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((0x2 * ((uint64_t)x11 * x27)) + ((0x2 * ((uint64_t)x13 * x25)) + ((uint64_t)x15 * x23)))))) + (0x75 * ((0x2 * ((uint64_t)x17 * x38)) + ((0x2 * ((uint64_t)x19 * x39)) + ((0x2 * ((uint64_t)x21 * x37)) + (0x2 * ((uint64_t)x20 * x35)))))));
-{ uint64_t x45 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((0x2 * ((uint64_t)x9 * x27)) + ((0x2 * ((uint64_t)x11 * x25)) + ((uint64_t)x13 * x23))))) + (0x75 * (((uint64_t)x15 * x38) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + ((0x2 * ((uint64_t)x21 * x35)) + ((uint64_t)x20 * x33)))))));
-{ uint64_t x46 = ((((uint64_t)x5 * x29) + ((0x2 * ((uint64_t)x7 * x27)) + ((0x2 * ((uint64_t)x9 * x25)) + ((uint64_t)x11 * x23)))) + (0x75 * (((uint64_t)x13 * x38) + (((uint64_t)x15 * x39) + ((0x2 * ((uint64_t)x17 * x37)) + ((0x2 * ((uint64_t)x19 * x35)) + (((uint64_t)x21 * x33) + ((uint64_t)x20 * x31))))))));
-{ uint64_t x47 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((uint64_t)x9 * x23))) + (0x75 * (((uint64_t)x11 * x38) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((0x2 * ((uint64_t)x17 * x35)) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((uint64_t)x20 * x29)))))))));
-{ uint64_t x48 = ((((uint64_t)x5 * x25) + ((uint64_t)x7 * x23)) + (0x75 * (((uint64_t)x9 * x38) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x20 * x27))))))))));
-{ uint64_t x49 = (((uint64_t)x5 * x23) + (0x75 * ((0x2 * ((uint64_t)x7 * x38)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + ((0x2 * ((uint64_t)x17 * x31)) + ((0x2 * ((uint64_t)x19 * x29)) + ((0x2 * ((uint64_t)x21 * x27)) + (0x2 * ((uint64_t)x20 * x25))))))))))));
-{ uint64_t x50 = (x49 >> 0x17);
-{ uint32_t x51 = ((uint32_t)x49 & 0x7fffff);
-{ uint64_t x52 = (x50 + x48);
-{ uint64_t x53 = (x52 >> 0x16);
-{ uint32_t x54 = ((uint32_t)x52 & 0x3fffff);
-{ uint64_t x55 = (x53 + x47);
-{ uint64_t x56 = (x55 >> 0x16);
-{ uint32_t x57 = ((uint32_t)x55 & 0x3fffff);
-{ uint64_t x58 = (x56 + x46);
-{ uint64_t x59 = (x58 >> 0x16);
-{ uint32_t x60 = ((uint32_t)x58 & 0x3fffff);
-{ uint64_t x61 = (x59 + x45);
-{ uint64_t x62 = (x61 >> 0x16);
-{ uint32_t x63 = ((uint32_t)x61 & 0x3fffff);
-{ uint64_t x64 = (x62 + x44);
-{ uint64_t x65 = (x64 >> 0x17);
-{ uint32_t x66 = ((uint32_t)x64 & 0x7fffff);
-{ uint64_t x67 = (x65 + x43);
-{ uint64_t x68 = (x67 >> 0x16);
-{ uint32_t x69 = ((uint32_t)x67 & 0x3fffff);
-{ uint64_t x70 = (x68 + x42);
-{ uint64_t x71 = (x70 >> 0x16);
-{ uint32_t x72 = ((uint32_t)x70 & 0x3fffff);
-{ uint64_t x73 = (x71 + x41);
-{ uint64_t x74 = (x73 >> 0x16);
-{ uint32_t x75 = ((uint32_t)x73 & 0x3fffff);
-{ uint64_t x76 = (x74 + x40);
-{ uint32_t x77 = (uint32_t) (x76 >> 0x16);
-{ uint32_t x78 = ((uint32_t)x76 & 0x3fffff);
-{ uint64_t x79 = (x51 + ((uint64_t)0x75 * x77));
-{ uint32_t x80 = (uint32_t) (x79 >> 0x17);
-{ uint32_t x81 = ((uint32_t)x79 & 0x7fffff);
-{ uint32_t x82 = (x80 + x54);
-{ uint32_t x83 = (x82 >> 0x16);
-{ uint32_t x84 = (x82 & 0x3fffff);
-out[0] = x78;
-out[1] = x75;
-out[2] = x72;
-out[3] = x69;
-out[4] = x66;
-out[5] = x63;
-out[6] = x60;
-out[7] = x83 + x57;
-out[8] = x84;
-out[9] = x81;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void femul(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) {
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x38 = in2[9];
+ { const uint32_t x39 = in2[8];
+ { const uint32_t x37 = in2[7];
+ { const uint32_t x35 = in2[6];
+ { const uint32_t x33 = in2[5];
+ { const uint32_t x31 = in2[4];
+ { const uint32_t x29 = in2[3];
+ { const uint32_t x27 = in2[2];
+ { const uint32_t x25 = in2[1];
+ { const uint32_t x23 = in2[0];
+ { uint64_t x40 = (((uint64_t)x5 * x38) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((0x2 * ((uint64_t)x11 * x35)) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + ((0x2 * ((uint64_t)x17 * x29)) + ((0x2 * ((uint64_t)x19 * x27)) + ((0x2 * ((uint64_t)x21 * x25)) + ((uint64_t)x20 * x23))))))))));
+ { uint64_t x41 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + ((0x2 * ((uint64_t)x17 * x27)) + ((0x2 * ((uint64_t)x19 * x25)) + ((uint64_t)x21 * x23))))))))) + (0x75 * ((uint64_t)x20 * x38)));
+ { uint64_t x42 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + ((0x2 * ((uint64_t)x17 * x25)) + ((uint64_t)x19 * x23)))))))) + (0x75 * (((uint64_t)x21 * x38) + ((uint64_t)x20 * x39))));
+ { uint64_t x43 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + ((uint64_t)x17 * x23))))))) + (0x75 * (((uint64_t)x19 * x38) + (((uint64_t)x21 * x39) + ((uint64_t)x20 * x37)))));
+ { uint64_t x44 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((0x2 * ((uint64_t)x11 * x27)) + ((0x2 * ((uint64_t)x13 * x25)) + ((uint64_t)x15 * x23)))))) + (0x75 * ((0x2 * ((uint64_t)x17 * x38)) + ((0x2 * ((uint64_t)x19 * x39)) + ((0x2 * ((uint64_t)x21 * x37)) + (0x2 * ((uint64_t)x20 * x35)))))));
+ { uint64_t x45 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((0x2 * ((uint64_t)x9 * x27)) + ((0x2 * ((uint64_t)x11 * x25)) + ((uint64_t)x13 * x23))))) + (0x75 * (((uint64_t)x15 * x38) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + ((0x2 * ((uint64_t)x21 * x35)) + ((uint64_t)x20 * x33)))))));
+ { uint64_t x46 = ((((uint64_t)x5 * x29) + ((0x2 * ((uint64_t)x7 * x27)) + ((0x2 * ((uint64_t)x9 * x25)) + ((uint64_t)x11 * x23)))) + (0x75 * (((uint64_t)x13 * x38) + (((uint64_t)x15 * x39) + ((0x2 * ((uint64_t)x17 * x37)) + ((0x2 * ((uint64_t)x19 * x35)) + (((uint64_t)x21 * x33) + ((uint64_t)x20 * x31))))))));
+ { uint64_t x47 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((uint64_t)x9 * x23))) + (0x75 * (((uint64_t)x11 * x38) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((0x2 * ((uint64_t)x17 * x35)) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((uint64_t)x20 * x29)))))))));
+ { uint64_t x48 = ((((uint64_t)x5 * x25) + ((uint64_t)x7 * x23)) + (0x75 * (((uint64_t)x9 * x38) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x20 * x27))))))))));
+ { uint64_t x49 = (((uint64_t)x5 * x23) + (0x75 * ((0x2 * ((uint64_t)x7 * x38)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + ((0x2 * ((uint64_t)x17 * x31)) + ((0x2 * ((uint64_t)x19 * x29)) + ((0x2 * ((uint64_t)x21 * x27)) + (0x2 * ((uint64_t)x20 * x25))))))))))));
+ { uint64_t x50 = (x49 >> 0x17);
+ { uint32_t x51 = ((uint32_t)x49 & 0x7fffff);
+ { uint64_t x52 = (x50 + x48);
+ { uint64_t x53 = (x52 >> 0x16);
+ { uint32_t x54 = ((uint32_t)x52 & 0x3fffff);
+ { uint64_t x55 = (x53 + x47);
+ { uint64_t x56 = (x55 >> 0x16);
+ { uint32_t x57 = ((uint32_t)x55 & 0x3fffff);
+ { uint64_t x58 = (x56 + x46);
+ { uint64_t x59 = (x58 >> 0x16);
+ { uint32_t x60 = ((uint32_t)x58 & 0x3fffff);
+ { uint64_t x61 = (x59 + x45);
+ { uint64_t x62 = (x61 >> 0x16);
+ { uint32_t x63 = ((uint32_t)x61 & 0x3fffff);
+ { uint64_t x64 = (x62 + x44);
+ { uint64_t x65 = (x64 >> 0x17);
+ { uint32_t x66 = ((uint32_t)x64 & 0x7fffff);
+ { uint64_t x67 = (x65 + x43);
+ { uint64_t x68 = (x67 >> 0x16);
+ { uint32_t x69 = ((uint32_t)x67 & 0x3fffff);
+ { uint64_t x70 = (x68 + x42);
+ { uint64_t x71 = (x70 >> 0x16);
+ { uint32_t x72 = ((uint32_t)x70 & 0x3fffff);
+ { uint64_t x73 = (x71 + x41);
+ { uint64_t x74 = (x73 >> 0x16);
+ { uint32_t x75 = ((uint32_t)x73 & 0x3fffff);
+ { uint64_t x76 = (x74 + x40);
+ { uint32_t x77 = (uint32_t) (x76 >> 0x16);
+ { uint32_t x78 = ((uint32_t)x76 & 0x3fffff);
+ { uint64_t x79 = (x51 + ((uint64_t)0x75 * x77));
+ { uint32_t x80 = (uint32_t) (x79 >> 0x17);
+ { uint32_t x81 = ((uint32_t)x79 & 0x7fffff);
+ { uint32_t x82 = (x80 + x54);
+ { uint32_t x83 = (x82 >> 0x16);
+ { uint32_t x84 = (x82 & 0x3fffff);
+ out[0] = x81;
+ out[1] = x84;
+ out[2] = (x83 + x57);
+ out[3] = x60;
+ out[4] = x63;
+ out[5] = x66;
+ out[6] = x69;
+ out[7] = x72;
+ out[8] = x75;
+ out[9] = x78;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e222m117/fesquare.c b/src/Specific/solinas32_2e222m117/fesquare.c
index c6de6e097..5d1995438 100644
--- a/src/Specific/solinas32_2e222m117/fesquare.c
+++ b/src/Specific/solinas32_2e222m117/fesquare.c
@@ -1,76 +1,68 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x19 = (((uint64_t)x2 * x17) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x17 * x2))))))))));
-{ uint64_t x20 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x75 * ((uint64_t)x17 * x17)));
-{ uint64_t x21 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x75 * (((uint64_t)x18 * x17) + ((uint64_t)x17 * x18))));
-{ uint64_t x22 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x75 * (((uint64_t)x16 * x17) + (((uint64_t)x18 * x18) + ((uint64_t)x17 * x16)))));
-{ uint64_t x23 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x75 * ((0x2 * ((uint64_t)x14 * x17)) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + (0x2 * ((uint64_t)x17 * x14)))))));
-{ uint64_t x24 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x75 * (((uint64_t)x12 * x17) + ((0x2 * ((uint64_t)x14 * x18)) + ((0x2 * ((uint64_t)x16 * x16)) + ((0x2 * ((uint64_t)x18 * x14)) + ((uint64_t)x17 * x12)))))));
-{ uint64_t x25 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x75 * (((uint64_t)x10 * x17) + (((uint64_t)x12 * x18) + ((0x2 * ((uint64_t)x14 * x16)) + ((0x2 * ((uint64_t)x16 * x14)) + (((uint64_t)x18 * x12) + ((uint64_t)x17 * x10))))))));
-{ uint64_t x26 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x75 * (((uint64_t)x8 * x17) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + ((0x2 * ((uint64_t)x14 * x14)) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + ((uint64_t)x17 * x8)))))))));
-{ uint64_t x27 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x75 * (((uint64_t)x6 * x17) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((uint64_t)x17 * x6))))))))));
-{ uint64_t x28 = (((uint64_t)x2 * x2) + (0x75 * ((0x2 * ((uint64_t)x4 * x17)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + (((uint64_t)x12 * x12) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + (0x2 * ((uint64_t)x17 * x4))))))))))));
-{ uint64_t x29 = (x28 >> 0x17);
-{ uint32_t x30 = ((uint32_t)x28 & 0x7fffff);
-{ uint64_t x31 = (x29 + x27);
-{ uint64_t x32 = (x31 >> 0x16);
-{ uint32_t x33 = ((uint32_t)x31 & 0x3fffff);
-{ uint64_t x34 = (x32 + x26);
-{ uint64_t x35 = (x34 >> 0x16);
-{ uint32_t x36 = ((uint32_t)x34 & 0x3fffff);
-{ uint64_t x37 = (x35 + x25);
-{ uint64_t x38 = (x37 >> 0x16);
-{ uint32_t x39 = ((uint32_t)x37 & 0x3fffff);
-{ uint64_t x40 = (x38 + x24);
-{ uint64_t x41 = (x40 >> 0x16);
-{ uint32_t x42 = ((uint32_t)x40 & 0x3fffff);
-{ uint64_t x43 = (x41 + x23);
-{ uint64_t x44 = (x43 >> 0x17);
-{ uint32_t x45 = ((uint32_t)x43 & 0x7fffff);
-{ uint64_t x46 = (x44 + x22);
-{ uint64_t x47 = (x46 >> 0x16);
-{ uint32_t x48 = ((uint32_t)x46 & 0x3fffff);
-{ uint64_t x49 = (x47 + x21);
-{ uint64_t x50 = (x49 >> 0x16);
-{ uint32_t x51 = ((uint32_t)x49 & 0x3fffff);
-{ uint64_t x52 = (x50 + x20);
-{ uint64_t x53 = (x52 >> 0x16);
-{ uint32_t x54 = ((uint32_t)x52 & 0x3fffff);
-{ uint64_t x55 = (x53 + x19);
-{ uint32_t x56 = (uint32_t) (x55 >> 0x16);
-{ uint32_t x57 = ((uint32_t)x55 & 0x3fffff);
-{ uint64_t x58 = (x30 + ((uint64_t)0x75 * x56));
-{ uint32_t x59 = (uint32_t) (x58 >> 0x17);
-{ uint32_t x60 = ((uint32_t)x58 & 0x7fffff);
-{ uint32_t x61 = (x59 + x33);
-{ uint32_t x62 = (x61 >> 0x16);
-{ uint32_t x63 = (x61 & 0x3fffff);
-out[0] = x57;
-out[1] = x54;
-out[2] = x51;
-out[3] = x48;
-out[4] = x45;
-out[5] = x42;
-out[6] = x39;
-out[7] = x62 + x36;
-out[8] = x63;
-out[9] = x60;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void fesquare(uint32_t out[10], const uint32_t in1[10]) {
+ { const uint32_t x17 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x19 = (((uint64_t)x2 * x17) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x17 * x2))))))))));
+ { uint64_t x20 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x75 * ((uint64_t)x17 * x17)));
+ { uint64_t x21 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x75 * (((uint64_t)x18 * x17) + ((uint64_t)x17 * x18))));
+ { uint64_t x22 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x75 * (((uint64_t)x16 * x17) + (((uint64_t)x18 * x18) + ((uint64_t)x17 * x16)))));
+ { uint64_t x23 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x75 * ((0x2 * ((uint64_t)x14 * x17)) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + (0x2 * ((uint64_t)x17 * x14)))))));
+ { uint64_t x24 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x75 * (((uint64_t)x12 * x17) + ((0x2 * ((uint64_t)x14 * x18)) + ((0x2 * ((uint64_t)x16 * x16)) + ((0x2 * ((uint64_t)x18 * x14)) + ((uint64_t)x17 * x12)))))));
+ { uint64_t x25 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x75 * (((uint64_t)x10 * x17) + (((uint64_t)x12 * x18) + ((0x2 * ((uint64_t)x14 * x16)) + ((0x2 * ((uint64_t)x16 * x14)) + (((uint64_t)x18 * x12) + ((uint64_t)x17 * x10))))))));
+ { uint64_t x26 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x75 * (((uint64_t)x8 * x17) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + ((0x2 * ((uint64_t)x14 * x14)) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + ((uint64_t)x17 * x8)))))))));
+ { uint64_t x27 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x75 * (((uint64_t)x6 * x17) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((uint64_t)x17 * x6))))))))));
+ { uint64_t x28 = (((uint64_t)x2 * x2) + (0x75 * ((0x2 * ((uint64_t)x4 * x17)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + (((uint64_t)x12 * x12) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + (0x2 * ((uint64_t)x17 * x4))))))))))));
+ { uint64_t x29 = (x28 >> 0x17);
+ { uint32_t x30 = ((uint32_t)x28 & 0x7fffff);
+ { uint64_t x31 = (x29 + x27);
+ { uint64_t x32 = (x31 >> 0x16);
+ { uint32_t x33 = ((uint32_t)x31 & 0x3fffff);
+ { uint64_t x34 = (x32 + x26);
+ { uint64_t x35 = (x34 >> 0x16);
+ { uint32_t x36 = ((uint32_t)x34 & 0x3fffff);
+ { uint64_t x37 = (x35 + x25);
+ { uint64_t x38 = (x37 >> 0x16);
+ { uint32_t x39 = ((uint32_t)x37 & 0x3fffff);
+ { uint64_t x40 = (x38 + x24);
+ { uint64_t x41 = (x40 >> 0x16);
+ { uint32_t x42 = ((uint32_t)x40 & 0x3fffff);
+ { uint64_t x43 = (x41 + x23);
+ { uint64_t x44 = (x43 >> 0x17);
+ { uint32_t x45 = ((uint32_t)x43 & 0x7fffff);
+ { uint64_t x46 = (x44 + x22);
+ { uint64_t x47 = (x46 >> 0x16);
+ { uint32_t x48 = ((uint32_t)x46 & 0x3fffff);
+ { uint64_t x49 = (x47 + x21);
+ { uint64_t x50 = (x49 >> 0x16);
+ { uint32_t x51 = ((uint32_t)x49 & 0x3fffff);
+ { uint64_t x52 = (x50 + x20);
+ { uint64_t x53 = (x52 >> 0x16);
+ { uint32_t x54 = ((uint32_t)x52 & 0x3fffff);
+ { uint64_t x55 = (x53 + x19);
+ { uint32_t x56 = (uint32_t) (x55 >> 0x16);
+ { uint32_t x57 = ((uint32_t)x55 & 0x3fffff);
+ { uint64_t x58 = (x30 + ((uint64_t)0x75 * x56));
+ { uint32_t x59 = (uint32_t) (x58 >> 0x17);
+ { uint32_t x60 = ((uint32_t)x58 & 0x7fffff);
+ { uint32_t x61 = (x59 + x33);
+ { uint32_t x62 = (x61 >> 0x16);
+ { uint32_t x63 = (x61 & 0x3fffff);
+ out[0] = x60;
+ out[1] = x63;
+ out[2] = (x62 + x36);
+ out[3] = x39;
+ out[4] = x42;
+ out[5] = x45;
+ out[6] = x48;
+ out[7] = x51;
+ out[8] = x54;
+ out[9] = x57;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e222m117/freeze.c b/src/Specific/solinas32_2e222m117/freeze.c
index abd6941de..10c86762a 100644
--- a/src/Specific/solinas32_2e222m117/freeze.c
+++ b/src/Specific/solinas32_2e222m117/freeze.c
@@ -1,25 +1,54 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x20;
-out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 23 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7fff8b;;
+static void freeze(uint32_t out[10], const uint32_t in1[10]) {
+ { const uint32_t x17 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x20, uint8_t x21 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x7fff8b);
+ { uint32_t x23, uint8_t x24 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x21, Return x4, 0x3fffff);
+ { uint32_t x26, uint8_t x27 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x24, Return x6, 0x3fffff);
+ { uint32_t x29, uint8_t x30 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x27, Return x8, 0x3fffff);
+ { uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x30, Return x10, 0x3fffff);
+ { uint32_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x12, 0x7fffff);
+ { uint32_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x36, Return x14, 0x3fffff);
+ { uint32_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x39, Return x16, 0x3fffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x18, 0x3fffff);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x17, 0x3fffff);
+ { uint32_t x49 = (uint32_t)cmovznz(x48, 0x0, 0xffffffff);
+ { uint32_t x50 = (x49 & 0x7fff8b);
+ { uint32_t x52, uint8_t x53 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x20, Return x50);
+ { uint32_t x54 = (x49 & 0x3fffff);
+ { uint32_t x56, uint8_t x57 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x53, Return x23, Return x54);
+ { uint32_t x58 = (x49 & 0x3fffff);
+ { uint32_t x60, uint8_t x61 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x26, Return x58);
+ { uint32_t x62 = (x49 & 0x3fffff);
+ { uint32_t x64, uint8_t x65 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x61, Return x29, Return x62);
+ { uint32_t x66 = (x49 & 0x3fffff);
+ { uint32_t x68, uint8_t x69 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x65, Return x32, Return x66);
+ { uint32_t x70 = (x49 & 0x7fffff);
+ { uint32_t x72, uint8_t x73 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x69, Return x35, Return x70);
+ { uint32_t x74 = (x49 & 0x3fffff);
+ { uint32_t x76, uint8_t x77 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x73, Return x38, Return x74);
+ { uint32_t x78 = (x49 & 0x3fffff);
+ { uint32_t x80, uint8_t x81 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x77, Return x41, Return x78);
+ { uint32_t x82 = (x49 & 0x3fffff);
+ { uint32_t x84, uint8_t x85 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x81, Return x44, Return x82);
+ { uint32_t x86 = (x49 & 0x3fffff);
+ { uint32_t x88, uint8_t _ = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x85, Return x47, Return x86);
+ out[0] = x52;
+ out[1] = x56;
+ out[2] = x60;
+ out[3] = x64;
+ out[4] = x68;
+ out[5] = x72;
+ out[6] = x76;
+ out[7] = x80;
+ out[8] = x84;
+ out[9] = x88;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e224m2e96p1/freeze.c b/src/Specific/solinas32_2e224m2e96p1/freeze.c
index b2ffc8354..f2dffb5bc 100644
--- a/src/Specific/solinas32_2e224m2e96p1/freeze.c
+++ b/src/Specific/solinas32_2e224m2e96p1/freeze.c
@@ -1,25 +1,42 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x16;
-out[1] = ℤ x17 = Op Syntax.SubWithGetBorrow 28 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 3 Syntax.TWord 5 Syntax.TZ 0x0;
-out[2] = x2;
-out[3] = 0x1;;
+static void freeze(uint32_t out[8], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x16, ℤ x17 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) Syntax.TZ) (0x0, Return x2, 0x1);
+ { uint32_t x19, ℤ x20 = Op (Syntax.SubWithGetBorrow 28 Syntax.TZ (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) Syntax.TZ) (Return x17, Return x4, 0x0);
+ { uint32_t x22, ℤ x23 = Op (Syntax.SubWithGetBorrow 28 Syntax.TZ (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) Syntax.TZ) (Return x20, Return x6, 0x0);
+ { uint32_t x25, uint8_t x26 = Op (Syntax.SubWithGetBorrow 28 Syntax.TZ (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x23, Return x8, 0xffff000);
+ { uint32_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x26, Return x10, 0xfffffff);
+ { uint32_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x29, Return x12, 0xfffffff);
+ { uint32_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x32, Return x14, 0xfffffff);
+ { uint32_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x35, Return x13, 0xfffffff);
+ { uint32_t x39 = (uint32_t)cmovznz(x38, 0x0, 0xffffffff);
+ { uint8_t x40 = ((uint8_t)x39 & 0x1);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x16, Return x40);
+ { uint32_t x45, uint8_t x46 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x43, Return x19, 0x0);
+ { uint32_t x48, uint8_t x49 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x46, Return x22, 0x0);
+ { uint32_t x50 = (x39 & 0xffff000);
+ { uint32_t x52, uint8_t x53 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x49, Return x25, Return x50);
+ { uint32_t x54 = (x39 & 0xfffffff);
+ { uint32_t x56, uint8_t x57 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x53, Return x28, Return x54);
+ { uint32_t x58 = (x39 & 0xfffffff);
+ { uint32_t x60, uint8_t x61 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x31, Return x58);
+ { uint32_t x62 = (x39 & 0xfffffff);
+ { uint32_t x64, uint8_t x65 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x61, Return x34, Return x62);
+ { uint32_t x66 = (x39 & 0xfffffff);
+ { uint32_t x68, uint8_t _ = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x65, Return x37, Return x66);
+ out[0] = x42;
+ out[1] = x45;
+ out[2] = x48;
+ out[3] = x52;
+ out[4] = x56;
+ out[5] = x60;
+ out[6] = x64;
+ out[7] = x68;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e226m5/femul.c b/src/Specific/solinas32_2e226m5/femul.c
index ffc389e14..894f84ba7 100644
--- a/src/Specific/solinas32_2e226m5/femul.c
+++ b/src/Specific/solinas32_2e226m5/femul.c
@@ -1,66 +1,64 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint64_t x32 = (((uint64_t)x5 * x30) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + ((0x2 * ((uint64_t)x15 * x23)) + ((0x2 * ((uint64_t)x17 * x21)) + ((uint64_t)x16 * x19))))))));
-{ uint64_t x33 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + ((0x2 * ((uint64_t)x15 * x21)) + ((uint64_t)x17 * x19))))))) + (0x5 * ((uint64_t)x16 * x30)));
-{ uint64_t x34 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x15 * x19)))))) + (0x5 * (((uint64_t)x17 * x30) + ((uint64_t)x16 * x31))));
-{ ℤ x35 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((0x2 * ((uint64_t)x11 * x21)) + ((uint64_t)x13 * x19))))) +ℤ (0x5 *ℤ ((0x2 * ((uint64_t)x15 * x30)) + ((0x2 * ((uint64_t)x17 * x31)) + (0x2 * ((uint64_t)x16 * x29))))));
-{ ℤ x36 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((0x2 * ((uint64_t)x9 * x21)) + ((uint64_t)x11 * x19)))) +ℤ (0x5 *ℤ (((uint64_t)x13 * x30) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((uint64_t)x16 * x27))))));
-{ ℤ x37 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + ((uint64_t)x9 * x19))) +ℤ (0x5 *ℤ (((uint64_t)x11 * x30) + (((uint64_t)x13 * x31) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((uint64_t)x16 * x25)))))));
-{ ℤ x38 = ((((uint64_t)x5 * x21) + ((uint64_t)x7 * x19)) +ℤ (0x5 *ℤ (((uint64_t)x9 * x30) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x16 * x23))))))));
-{ ℤ x39 = (((uint64_t)x5 * x19) +ℤ (0x5 *ℤ ((0x2 * ((uint64_t)x7 * x30)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + (((uint64_t)x13 * x27) + ((0x2 * ((uint64_t)x15 * x25)) + ((0x2 * ((uint64_t)x17 * x23)) + (0x2 * ((uint64_t)x16 * x21))))))))));
-{ uint64_t x40 = (x39 >> 0x1d);
-{ uint32_t x41 = (x39 & 0x1fffffff);
-{ ℤ x42 = (x40 +ℤ x38);
-{ uint64_t x43 = (x42 >> 0x1c);
-{ uint32_t x44 = (x42 & 0xfffffff);
-{ ℤ x45 = (x43 +ℤ x37);
-{ uint64_t x46 = (x45 >> 0x1c);
-{ uint32_t x47 = (x45 & 0xfffffff);
-{ ℤ x48 = (x46 +ℤ x36);
-{ uint64_t x49 = (x48 >> 0x1c);
-{ uint32_t x50 = (x48 & 0xfffffff);
-{ ℤ x51 = (x49 +ℤ x35);
-{ uint64_t x52 = (x51 >> 0x1d);
-{ uint32_t x53 = (x51 & 0x1fffffff);
-{ uint64_t x54 = (x52 + x34);
-{ uint64_t x55 = (x54 >> 0x1c);
-{ uint32_t x56 = ((uint32_t)x54 & 0xfffffff);
-{ uint64_t x57 = (x55 + x33);
-{ uint64_t x58 = (x57 >> 0x1c);
-{ uint32_t x59 = ((uint32_t)x57 & 0xfffffff);
-{ uint64_t x60 = (x58 + x32);
-{ uint64_t x61 = (x60 >> 0x1c);
-{ uint32_t x62 = ((uint32_t)x60 & 0xfffffff);
-{ uint64_t x63 = (x41 + (0x5 * x61));
-{ uint32_t x64 = (uint32_t) (x63 >> 0x1d);
-{ uint32_t x65 = ((uint32_t)x63 & 0x1fffffff);
-{ uint32_t x66 = (x64 + x44);
-{ uint32_t x67 = (x66 >> 0x1c);
-{ uint32_t x68 = (x66 & 0xfffffff);
-out[0] = x62;
-out[1] = x59;
-out[2] = x56;
-out[3] = x53;
-out[4] = x50;
-out[5] = x67 + x47;
-out[6] = x68;
-out[7] = x65;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void femul(uint32_t out[8], const uint32_t in1[8], const uint32_t in2[8]) {
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x30 = in2[7];
+ { const uint32_t x31 = in2[6];
+ { const uint32_t x29 = in2[5];
+ { const uint32_t x27 = in2[4];
+ { const uint32_t x25 = in2[3];
+ { const uint32_t x23 = in2[2];
+ { const uint32_t x21 = in2[1];
+ { const uint32_t x19 = in2[0];
+ { uint64_t x32 = (((uint64_t)x5 * x30) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + ((0x2 * ((uint64_t)x15 * x23)) + ((0x2 * ((uint64_t)x17 * x21)) + ((uint64_t)x16 * x19))))))));
+ { uint64_t x33 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + ((0x2 * ((uint64_t)x15 * x21)) + ((uint64_t)x17 * x19))))))) + (0x5 * ((uint64_t)x16 * x30)));
+ { uint64_t x34 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x15 * x19)))))) + (0x5 * (((uint64_t)x17 * x30) + ((uint64_t)x16 * x31))));
+ { ℤ x35 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((0x2 * ((uint64_t)x11 * x21)) + ((uint64_t)x13 * x19))))) +ℤ (0x5 *ℤ ((0x2 * ((uint64_t)x15 * x30)) + ((0x2 * ((uint64_t)x17 * x31)) + (0x2 * ((uint64_t)x16 * x29))))));
+ { ℤ x36 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((0x2 * ((uint64_t)x9 * x21)) + ((uint64_t)x11 * x19)))) +ℤ (0x5 *ℤ (((uint64_t)x13 * x30) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((uint64_t)x16 * x27))))));
+ { ℤ x37 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + ((uint64_t)x9 * x19))) +ℤ (0x5 *ℤ (((uint64_t)x11 * x30) + (((uint64_t)x13 * x31) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((uint64_t)x16 * x25)))))));
+ { ℤ x38 = ((((uint64_t)x5 * x21) + ((uint64_t)x7 * x19)) +ℤ (0x5 *ℤ (((uint64_t)x9 * x30) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x16 * x23))))))));
+ { ℤ x39 = (((uint64_t)x5 * x19) +ℤ (0x5 *ℤ ((0x2 * ((uint64_t)x7 * x30)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + (((uint64_t)x13 * x27) + ((0x2 * ((uint64_t)x15 * x25)) + ((0x2 * ((uint64_t)x17 * x23)) + (0x2 * ((uint64_t)x16 * x21))))))))));
+ { uint64_t x40 = (x39 >> 0x1d);
+ { uint32_t x41 = (x39 & 0x1fffffff);
+ { ℤ x42 = (x40 +ℤ x38);
+ { uint64_t x43 = (x42 >> 0x1c);
+ { uint32_t x44 = (x42 & 0xfffffff);
+ { ℤ x45 = (x43 +ℤ x37);
+ { uint64_t x46 = (x45 >> 0x1c);
+ { uint32_t x47 = (x45 & 0xfffffff);
+ { ℤ x48 = (x46 +ℤ x36);
+ { uint64_t x49 = (x48 >> 0x1c);
+ { uint32_t x50 = (x48 & 0xfffffff);
+ { ℤ x51 = (x49 +ℤ x35);
+ { uint64_t x52 = (x51 >> 0x1d);
+ { uint32_t x53 = (x51 & 0x1fffffff);
+ { uint64_t x54 = (x52 + x34);
+ { uint64_t x55 = (x54 >> 0x1c);
+ { uint32_t x56 = ((uint32_t)x54 & 0xfffffff);
+ { uint64_t x57 = (x55 + x33);
+ { uint64_t x58 = (x57 >> 0x1c);
+ { uint32_t x59 = ((uint32_t)x57 & 0xfffffff);
+ { uint64_t x60 = (x58 + x32);
+ { uint64_t x61 = (x60 >> 0x1c);
+ { uint32_t x62 = ((uint32_t)x60 & 0xfffffff);
+ { uint64_t x63 = (x41 + (0x5 * x61));
+ { uint32_t x64 = (uint32_t) (x63 >> 0x1d);
+ { uint32_t x65 = ((uint32_t)x63 & 0x1fffffff);
+ { uint32_t x66 = (x64 + x44);
+ { uint32_t x67 = (x66 >> 0x1c);
+ { uint32_t x68 = (x66 & 0xfffffff);
+ out[0] = x65;
+ out[1] = x68;
+ out[2] = (x67 + x47);
+ out[3] = x50;
+ out[4] = x53;
+ out[5] = x56;
+ out[6] = x59;
+ out[7] = x62;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e226m5/fesquare.c b/src/Specific/solinas32_2e226m5/fesquare.c
index ca7577562..7323aea58 100644
--- a/src/Specific/solinas32_2e226m5/fesquare.c
+++ b/src/Specific/solinas32_2e226m5/fesquare.c
@@ -1,66 +1,56 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x15 = (((uint64_t)x2 * x13) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x13 * x2))))))));
-{ uint64_t x16 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x5 * ((uint64_t)x13 * x13)));
-{ uint64_t x17 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x5 * (((uint64_t)x14 * x13) + ((uint64_t)x13 * x14))));
-{ ℤ x18 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) +ℤ (0x5 *ℤ ((0x2 * ((uint64_t)x12 * x13)) + ((0x2 * ((uint64_t)x14 * x14)) + (0x2 * ((uint64_t)x13 * x12))))));
-{ ℤ x19 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) +ℤ (0x5 *ℤ (((uint64_t)x10 * x13) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + ((uint64_t)x13 * x10))))));
-{ ℤ x20 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) +ℤ (0x5 *ℤ (((uint64_t)x8 * x13) + (((uint64_t)x10 * x14) + ((0x2 * ((uint64_t)x12 * x12)) + (((uint64_t)x14 * x10) + ((uint64_t)x13 * x8)))))));
-{ ℤ x21 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) +ℤ (0x5 *ℤ (((uint64_t)x6 * x13) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x13 * x6))))))));
-{ ℤ x22 = (((uint64_t)x2 * x2) +ℤ (0x5 *ℤ ((0x2 * ((uint64_t)x4 * x13)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + (((uint64_t)x10 * x10) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + (0x2 * ((uint64_t)x13 * x4))))))))));
-{ uint64_t x23 = (x22 >> 0x1d);
-{ uint32_t x24 = (x22 & 0x1fffffff);
-{ ℤ x25 = (x23 +ℤ x21);
-{ uint64_t x26 = (x25 >> 0x1c);
-{ uint32_t x27 = (x25 & 0xfffffff);
-{ ℤ x28 = (x26 +ℤ x20);
-{ uint64_t x29 = (x28 >> 0x1c);
-{ uint32_t x30 = (x28 & 0xfffffff);
-{ ℤ x31 = (x29 +ℤ x19);
-{ uint64_t x32 = (x31 >> 0x1c);
-{ uint32_t x33 = (x31 & 0xfffffff);
-{ ℤ x34 = (x32 +ℤ x18);
-{ uint64_t x35 = (x34 >> 0x1d);
-{ uint32_t x36 = (x34 & 0x1fffffff);
-{ uint64_t x37 = (x35 + x17);
-{ uint64_t x38 = (x37 >> 0x1c);
-{ uint32_t x39 = ((uint32_t)x37 & 0xfffffff);
-{ uint64_t x40 = (x38 + x16);
-{ uint64_t x41 = (x40 >> 0x1c);
-{ uint32_t x42 = ((uint32_t)x40 & 0xfffffff);
-{ uint64_t x43 = (x41 + x15);
-{ uint64_t x44 = (x43 >> 0x1c);
-{ uint32_t x45 = ((uint32_t)x43 & 0xfffffff);
-{ uint64_t x46 = (x24 + (0x5 * x44));
-{ uint32_t x47 = (uint32_t) (x46 >> 0x1d);
-{ uint32_t x48 = ((uint32_t)x46 & 0x1fffffff);
-{ uint32_t x49 = (x47 + x27);
-{ uint32_t x50 = (x49 >> 0x1c);
-{ uint32_t x51 = (x49 & 0xfffffff);
-out[0] = x45;
-out[1] = x42;
-out[2] = x39;
-out[3] = x36;
-out[4] = x33;
-out[5] = x50 + x30;
-out[6] = x51;
-out[7] = x48;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void fesquare(uint32_t out[8], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x15 = (((uint64_t)x2 * x13) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x13 * x2))))))));
+ { uint64_t x16 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x5 * ((uint64_t)x13 * x13)));
+ { uint64_t x17 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x5 * (((uint64_t)x14 * x13) + ((uint64_t)x13 * x14))));
+ { ℤ x18 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) +ℤ (0x5 *ℤ ((0x2 * ((uint64_t)x12 * x13)) + ((0x2 * ((uint64_t)x14 * x14)) + (0x2 * ((uint64_t)x13 * x12))))));
+ { ℤ x19 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) +ℤ (0x5 *ℤ (((uint64_t)x10 * x13) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + ((uint64_t)x13 * x10))))));
+ { ℤ x20 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) +ℤ (0x5 *ℤ (((uint64_t)x8 * x13) + (((uint64_t)x10 * x14) + ((0x2 * ((uint64_t)x12 * x12)) + (((uint64_t)x14 * x10) + ((uint64_t)x13 * x8)))))));
+ { ℤ x21 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) +ℤ (0x5 *ℤ (((uint64_t)x6 * x13) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x13 * x6))))))));
+ { ℤ x22 = (((uint64_t)x2 * x2) +ℤ (0x5 *ℤ ((0x2 * ((uint64_t)x4 * x13)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + (((uint64_t)x10 * x10) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + (0x2 * ((uint64_t)x13 * x4))))))))));
+ { uint64_t x23 = (x22 >> 0x1d);
+ { uint32_t x24 = (x22 & 0x1fffffff);
+ { ℤ x25 = (x23 +ℤ x21);
+ { uint64_t x26 = (x25 >> 0x1c);
+ { uint32_t x27 = (x25 & 0xfffffff);
+ { ℤ x28 = (x26 +ℤ x20);
+ { uint64_t x29 = (x28 >> 0x1c);
+ { uint32_t x30 = (x28 & 0xfffffff);
+ { ℤ x31 = (x29 +ℤ x19);
+ { uint64_t x32 = (x31 >> 0x1c);
+ { uint32_t x33 = (x31 & 0xfffffff);
+ { ℤ x34 = (x32 +ℤ x18);
+ { uint64_t x35 = (x34 >> 0x1d);
+ { uint32_t x36 = (x34 & 0x1fffffff);
+ { uint64_t x37 = (x35 + x17);
+ { uint64_t x38 = (x37 >> 0x1c);
+ { uint32_t x39 = ((uint32_t)x37 & 0xfffffff);
+ { uint64_t x40 = (x38 + x16);
+ { uint64_t x41 = (x40 >> 0x1c);
+ { uint32_t x42 = ((uint32_t)x40 & 0xfffffff);
+ { uint64_t x43 = (x41 + x15);
+ { uint64_t x44 = (x43 >> 0x1c);
+ { uint32_t x45 = ((uint32_t)x43 & 0xfffffff);
+ { uint64_t x46 = (x24 + (0x5 * x44));
+ { uint32_t x47 = (uint32_t) (x46 >> 0x1d);
+ { uint32_t x48 = ((uint32_t)x46 & 0x1fffffff);
+ { uint32_t x49 = (x47 + x27);
+ { uint32_t x50 = (x49 >> 0x1c);
+ { uint32_t x51 = (x49 & 0xfffffff);
+ out[0] = x48;
+ out[1] = x51;
+ out[2] = (x50 + x30);
+ out[3] = x33;
+ out[4] = x36;
+ out[5] = x39;
+ out[6] = x42;
+ out[7] = x45;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e226m5/freeze.c b/src/Specific/solinas32_2e226m5/freeze.c
index 5251bb672..96d54a5c0 100644
--- a/src/Specific/solinas32_2e226m5/freeze.c
+++ b/src/Specific/solinas32_2e226m5/freeze.c
@@ -1,25 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x16;
-out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 29 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x1ffffffb;;
+static void freeze(uint32_t out[8], const uint32_t in1[8]) {
+ { const uint32_t x13 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x1ffffffb);
+ { uint32_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x17, Return x4, 0xfffffff);
+ { uint32_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x20, Return x6, 0xfffffff);
+ { uint32_t x25, uint8_t x26 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x23, Return x8, 0xfffffff);
+ { uint32_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x26, Return x10, 0x1fffffff);
+ { uint32_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x29, Return x12, 0xfffffff);
+ { uint32_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x32, Return x14, 0xfffffff);
+ { uint32_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x35, Return x13, 0xfffffff);
+ { uint32_t x39 = (uint32_t)cmovznz(x38, 0x0, 0xffffffff);
+ { uint32_t x40 = (x39 & 0x1ffffffb);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.AddWithGetCarry 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x16, Return x40);
+ { uint32_t x44 = (x39 & 0xfffffff);
+ { uint32_t x46, uint8_t x47 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x43, Return x19, Return x44);
+ { uint32_t x48 = (x39 & 0xfffffff);
+ { uint32_t x50, uint8_t x51 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x47, Return x22, Return x48);
+ { uint32_t x52 = (x39 & 0xfffffff);
+ { uint32_t x54, uint8_t x55 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x51, Return x25, Return x52);
+ { uint32_t x56 = (x39 & 0x1fffffff);
+ { uint32_t x58, uint8_t x59 = Op (Syntax.AddWithGetCarry 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x55, Return x28, Return x56);
+ { uint32_t x60 = (x39 & 0xfffffff);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x59, Return x31, Return x60);
+ { uint32_t x64 = (x39 & 0xfffffff);
+ { uint32_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x34, Return x64);
+ { uint32_t x68 = (x39 & 0xfffffff);
+ { uint32_t x70, uint8_t _ = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x67, Return x37, Return x68);
+ out[0] = x42;
+ out[1] = x46;
+ out[2] = x50;
+ out[3] = x54;
+ out[4] = x58;
+ out[5] = x62;
+ out[6] = x66;
+ out[7] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e230m27/femul.c b/src/Specific/solinas32_2e230m27/femul.c
index ebefa4d02..75851dd8a 100644
--- a/src/Specific/solinas32_2e230m27/femul.c
+++ b/src/Specific/solinas32_2e230m27/femul.c
@@ -1,76 +1,78 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
-{ uint64_t x40 = (((uint64_t)x5 * x38) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + (((uint64_t)x21 * x25) + ((uint64_t)x20 * x23))))))))));
-{ uint64_t x41 = ((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + (((uint64_t)x17 * x27) + (((uint64_t)x19 * x25) + ((uint64_t)x21 * x23))))))))) + (0x1b * ((uint64_t)x20 * x38)));
-{ uint64_t x42 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x19 * x23)))))))) + (0x1b * (((uint64_t)x21 * x38) + ((uint64_t)x20 * x39))));
-{ uint64_t x43 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + ((uint64_t)x17 * x23))))))) + (0x1b * (((uint64_t)x19 * x38) + (((uint64_t)x21 * x39) + ((uint64_t)x20 * x37)))));
-{ uint64_t x44 = ((((uint64_t)x5 * x33) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + ((uint64_t)x15 * x23)))))) + (0x1b * (((uint64_t)x17 * x38) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + ((uint64_t)x20 * x35))))));
-{ uint64_t x45 = ((((uint64_t)x5 * x31) + (((uint64_t)x7 * x29) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + ((uint64_t)x13 * x23))))) + (0x1b * (((uint64_t)x15 * x38) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + ((uint64_t)x20 * x33)))))));
-{ uint64_t x46 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + ((uint64_t)x11 * x23)))) + (0x1b * (((uint64_t)x13 * x38) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((uint64_t)x20 * x31))))))));
-{ uint64_t x47 = ((((uint64_t)x5 * x27) + (((uint64_t)x7 * x25) + ((uint64_t)x9 * x23))) + (0x1b * (((uint64_t)x11 * x38) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((uint64_t)x20 * x29)))))))));
-{ uint64_t x48 = ((((uint64_t)x5 * x25) + ((uint64_t)x7 * x23)) + (0x1b * (((uint64_t)x9 * x38) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x20 * x27))))))))));
-{ uint64_t x49 = (((uint64_t)x5 * x23) + (0x1b * (((uint64_t)x7 * x38) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + (((uint64_t)x19 * x29) + (((uint64_t)x21 * x27) + ((uint64_t)x20 * x25)))))))))));
-{ uint64_t x50 = (x49 >> 0x17);
-{ uint32_t x51 = ((uint32_t)x49 & 0x7fffff);
-{ uint64_t x52 = (x50 + x48);
-{ uint64_t x53 = (x52 >> 0x17);
-{ uint32_t x54 = ((uint32_t)x52 & 0x7fffff);
-{ uint64_t x55 = (x53 + x47);
-{ uint64_t x56 = (x55 >> 0x17);
-{ uint32_t x57 = ((uint32_t)x55 & 0x7fffff);
-{ uint64_t x58 = (x56 + x46);
-{ uint64_t x59 = (x58 >> 0x17);
-{ uint32_t x60 = ((uint32_t)x58 & 0x7fffff);
-{ uint64_t x61 = (x59 + x45);
-{ uint64_t x62 = (x61 >> 0x17);
-{ uint32_t x63 = ((uint32_t)x61 & 0x7fffff);
-{ uint64_t x64 = (x62 + x44);
-{ uint64_t x65 = (x64 >> 0x17);
-{ uint32_t x66 = ((uint32_t)x64 & 0x7fffff);
-{ uint64_t x67 = (x65 + x43);
-{ uint64_t x68 = (x67 >> 0x17);
-{ uint32_t x69 = ((uint32_t)x67 & 0x7fffff);
-{ uint64_t x70 = (x68 + x42);
-{ uint64_t x71 = (x70 >> 0x17);
-{ uint32_t x72 = ((uint32_t)x70 & 0x7fffff);
-{ uint64_t x73 = (x71 + x41);
-{ uint32_t x74 = (uint32_t) (x73 >> 0x17);
-{ uint32_t x75 = ((uint32_t)x73 & 0x7fffff);
-{ uint64_t x76 = (x74 + x40);
-{ uint32_t x77 = (uint32_t) (x76 >> 0x17);
-{ uint32_t x78 = ((uint32_t)x76 & 0x7fffff);
-{ uint64_t x79 = (x51 + ((uint64_t)0x1b * x77));
-{ uint32_t x80 = (uint32_t) (x79 >> 0x17);
-{ uint32_t x81 = ((uint32_t)x79 & 0x7fffff);
-{ uint32_t x82 = (x80 + x54);
-{ uint32_t x83 = (x82 >> 0x17);
-{ uint32_t x84 = (x82 & 0x7fffff);
-out[0] = x78;
-out[1] = x75;
-out[2] = x72;
-out[3] = x69;
-out[4] = x66;
-out[5] = x63;
-out[6] = x60;
-out[7] = x83 + x57;
-out[8] = x84;
-out[9] = x81;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void femul(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) {
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x38 = in2[9];
+ { const uint32_t x39 = in2[8];
+ { const uint32_t x37 = in2[7];
+ { const uint32_t x35 = in2[6];
+ { const uint32_t x33 = in2[5];
+ { const uint32_t x31 = in2[4];
+ { const uint32_t x29 = in2[3];
+ { const uint32_t x27 = in2[2];
+ { const uint32_t x25 = in2[1];
+ { const uint32_t x23 = in2[0];
+ { uint64_t x40 = (((uint64_t)x5 * x38) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + (((uint64_t)x21 * x25) + ((uint64_t)x20 * x23))))))))));
+ { uint64_t x41 = ((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + (((uint64_t)x17 * x27) + (((uint64_t)x19 * x25) + ((uint64_t)x21 * x23))))))))) + (0x1b * ((uint64_t)x20 * x38)));
+ { uint64_t x42 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x19 * x23)))))))) + (0x1b * (((uint64_t)x21 * x38) + ((uint64_t)x20 * x39))));
+ { uint64_t x43 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + ((uint64_t)x17 * x23))))))) + (0x1b * (((uint64_t)x19 * x38) + (((uint64_t)x21 * x39) + ((uint64_t)x20 * x37)))));
+ { uint64_t x44 = ((((uint64_t)x5 * x33) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + ((uint64_t)x15 * x23)))))) + (0x1b * (((uint64_t)x17 * x38) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + ((uint64_t)x20 * x35))))));
+ { uint64_t x45 = ((((uint64_t)x5 * x31) + (((uint64_t)x7 * x29) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + ((uint64_t)x13 * x23))))) + (0x1b * (((uint64_t)x15 * x38) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + ((uint64_t)x20 * x33)))))));
+ { uint64_t x46 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + ((uint64_t)x11 * x23)))) + (0x1b * (((uint64_t)x13 * x38) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((uint64_t)x20 * x31))))))));
+ { uint64_t x47 = ((((uint64_t)x5 * x27) + (((uint64_t)x7 * x25) + ((uint64_t)x9 * x23))) + (0x1b * (((uint64_t)x11 * x38) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((uint64_t)x20 * x29)))))))));
+ { uint64_t x48 = ((((uint64_t)x5 * x25) + ((uint64_t)x7 * x23)) + (0x1b * (((uint64_t)x9 * x38) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x20 * x27))))))))));
+ { uint64_t x49 = (((uint64_t)x5 * x23) + (0x1b * (((uint64_t)x7 * x38) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + (((uint64_t)x19 * x29) + (((uint64_t)x21 * x27) + ((uint64_t)x20 * x25)))))))))));
+ { uint64_t x50 = (x49 >> 0x17);
+ { uint32_t x51 = ((uint32_t)x49 & 0x7fffff);
+ { uint64_t x52 = (x50 + x48);
+ { uint64_t x53 = (x52 >> 0x17);
+ { uint32_t x54 = ((uint32_t)x52 & 0x7fffff);
+ { uint64_t x55 = (x53 + x47);
+ { uint64_t x56 = (x55 >> 0x17);
+ { uint32_t x57 = ((uint32_t)x55 & 0x7fffff);
+ { uint64_t x58 = (x56 + x46);
+ { uint64_t x59 = (x58 >> 0x17);
+ { uint32_t x60 = ((uint32_t)x58 & 0x7fffff);
+ { uint64_t x61 = (x59 + x45);
+ { uint64_t x62 = (x61 >> 0x17);
+ { uint32_t x63 = ((uint32_t)x61 & 0x7fffff);
+ { uint64_t x64 = (x62 + x44);
+ { uint64_t x65 = (x64 >> 0x17);
+ { uint32_t x66 = ((uint32_t)x64 & 0x7fffff);
+ { uint64_t x67 = (x65 + x43);
+ { uint64_t x68 = (x67 >> 0x17);
+ { uint32_t x69 = ((uint32_t)x67 & 0x7fffff);
+ { uint64_t x70 = (x68 + x42);
+ { uint64_t x71 = (x70 >> 0x17);
+ { uint32_t x72 = ((uint32_t)x70 & 0x7fffff);
+ { uint64_t x73 = (x71 + x41);
+ { uint32_t x74 = (uint32_t) (x73 >> 0x17);
+ { uint32_t x75 = ((uint32_t)x73 & 0x7fffff);
+ { uint64_t x76 = (x74 + x40);
+ { uint32_t x77 = (uint32_t) (x76 >> 0x17);
+ { uint32_t x78 = ((uint32_t)x76 & 0x7fffff);
+ { uint64_t x79 = (x51 + ((uint64_t)0x1b * x77));
+ { uint32_t x80 = (uint32_t) (x79 >> 0x17);
+ { uint32_t x81 = ((uint32_t)x79 & 0x7fffff);
+ { uint32_t x82 = (x80 + x54);
+ { uint32_t x83 = (x82 >> 0x17);
+ { uint32_t x84 = (x82 & 0x7fffff);
+ out[0] = x81;
+ out[1] = x84;
+ out[2] = (x83 + x57);
+ out[3] = x60;
+ out[4] = x63;
+ out[5] = x66;
+ out[6] = x69;
+ out[7] = x72;
+ out[8] = x75;
+ out[9] = x78;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e230m27/fesquare.c b/src/Specific/solinas32_2e230m27/fesquare.c
index edf1517cb..7be6c9a29 100644
--- a/src/Specific/solinas32_2e230m27/fesquare.c
+++ b/src/Specific/solinas32_2e230m27/fesquare.c
@@ -1,76 +1,68 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x19 = (((uint64_t)x2 * x17) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x17 * x2))))))))));
-{ uint64_t x20 = ((((uint64_t)x2 * x18) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x18 * x2))))))))) + (0x1b * ((uint64_t)x17 * x17)));
-{ uint64_t x21 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x1b * (((uint64_t)x18 * x17) + ((uint64_t)x17 * x18))));
-{ uint64_t x22 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x1b * (((uint64_t)x16 * x17) + (((uint64_t)x18 * x18) + ((uint64_t)x17 * x16)))));
-{ uint64_t x23 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x1b * (((uint64_t)x14 * x17) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((uint64_t)x17 * x14))))));
-{ uint64_t x24 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x1b * (((uint64_t)x12 * x17) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + ((uint64_t)x17 * x12)))))));
-{ uint64_t x25 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x1b * (((uint64_t)x10 * x17) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + ((uint64_t)x17 * x10))))))));
-{ uint64_t x26 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x1b * (((uint64_t)x8 * x17) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + ((uint64_t)x17 * x8)))))))));
-{ uint64_t x27 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x1b * (((uint64_t)x6 * x17) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((uint64_t)x17 * x6))))))))));
-{ uint64_t x28 = (((uint64_t)x2 * x2) + (0x1b * (((uint64_t)x4 * x17) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + ((uint64_t)x17 * x4)))))))))));
-{ uint64_t x29 = (x28 >> 0x17);
-{ uint32_t x30 = ((uint32_t)x28 & 0x7fffff);
-{ uint64_t x31 = (x29 + x27);
-{ uint64_t x32 = (x31 >> 0x17);
-{ uint32_t x33 = ((uint32_t)x31 & 0x7fffff);
-{ uint64_t x34 = (x32 + x26);
-{ uint64_t x35 = (x34 >> 0x17);
-{ uint32_t x36 = ((uint32_t)x34 & 0x7fffff);
-{ uint64_t x37 = (x35 + x25);
-{ uint64_t x38 = (x37 >> 0x17);
-{ uint32_t x39 = ((uint32_t)x37 & 0x7fffff);
-{ uint64_t x40 = (x38 + x24);
-{ uint64_t x41 = (x40 >> 0x17);
-{ uint32_t x42 = ((uint32_t)x40 & 0x7fffff);
-{ uint64_t x43 = (x41 + x23);
-{ uint64_t x44 = (x43 >> 0x17);
-{ uint32_t x45 = ((uint32_t)x43 & 0x7fffff);
-{ uint64_t x46 = (x44 + x22);
-{ uint64_t x47 = (x46 >> 0x17);
-{ uint32_t x48 = ((uint32_t)x46 & 0x7fffff);
-{ uint64_t x49 = (x47 + x21);
-{ uint64_t x50 = (x49 >> 0x17);
-{ uint32_t x51 = ((uint32_t)x49 & 0x7fffff);
-{ uint64_t x52 = (x50 + x20);
-{ uint32_t x53 = (uint32_t) (x52 >> 0x17);
-{ uint32_t x54 = ((uint32_t)x52 & 0x7fffff);
-{ uint64_t x55 = (x53 + x19);
-{ uint32_t x56 = (uint32_t) (x55 >> 0x17);
-{ uint32_t x57 = ((uint32_t)x55 & 0x7fffff);
-{ uint64_t x58 = (x30 + ((uint64_t)0x1b * x56));
-{ uint32_t x59 = (uint32_t) (x58 >> 0x17);
-{ uint32_t x60 = ((uint32_t)x58 & 0x7fffff);
-{ uint32_t x61 = (x59 + x33);
-{ uint32_t x62 = (x61 >> 0x17);
-{ uint32_t x63 = (x61 & 0x7fffff);
-out[0] = x57;
-out[1] = x54;
-out[2] = x51;
-out[3] = x48;
-out[4] = x45;
-out[5] = x42;
-out[6] = x39;
-out[7] = x62 + x36;
-out[8] = x63;
-out[9] = x60;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void fesquare(uint32_t out[10], const uint32_t in1[10]) {
+ { const uint32_t x17 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x19 = (((uint64_t)x2 * x17) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x17 * x2))))))))));
+ { uint64_t x20 = ((((uint64_t)x2 * x18) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x18 * x2))))))))) + (0x1b * ((uint64_t)x17 * x17)));
+ { uint64_t x21 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x1b * (((uint64_t)x18 * x17) + ((uint64_t)x17 * x18))));
+ { uint64_t x22 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x1b * (((uint64_t)x16 * x17) + (((uint64_t)x18 * x18) + ((uint64_t)x17 * x16)))));
+ { uint64_t x23 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x1b * (((uint64_t)x14 * x17) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((uint64_t)x17 * x14))))));
+ { uint64_t x24 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x1b * (((uint64_t)x12 * x17) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + ((uint64_t)x17 * x12)))))));
+ { uint64_t x25 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x1b * (((uint64_t)x10 * x17) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + ((uint64_t)x17 * x10))))))));
+ { uint64_t x26 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x1b * (((uint64_t)x8 * x17) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + ((uint64_t)x17 * x8)))))))));
+ { uint64_t x27 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x1b * (((uint64_t)x6 * x17) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((uint64_t)x17 * x6))))))))));
+ { uint64_t x28 = (((uint64_t)x2 * x2) + (0x1b * (((uint64_t)x4 * x17) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + ((uint64_t)x17 * x4)))))))))));
+ { uint64_t x29 = (x28 >> 0x17);
+ { uint32_t x30 = ((uint32_t)x28 & 0x7fffff);
+ { uint64_t x31 = (x29 + x27);
+ { uint64_t x32 = (x31 >> 0x17);
+ { uint32_t x33 = ((uint32_t)x31 & 0x7fffff);
+ { uint64_t x34 = (x32 + x26);
+ { uint64_t x35 = (x34 >> 0x17);
+ { uint32_t x36 = ((uint32_t)x34 & 0x7fffff);
+ { uint64_t x37 = (x35 + x25);
+ { uint64_t x38 = (x37 >> 0x17);
+ { uint32_t x39 = ((uint32_t)x37 & 0x7fffff);
+ { uint64_t x40 = (x38 + x24);
+ { uint64_t x41 = (x40 >> 0x17);
+ { uint32_t x42 = ((uint32_t)x40 & 0x7fffff);
+ { uint64_t x43 = (x41 + x23);
+ { uint64_t x44 = (x43 >> 0x17);
+ { uint32_t x45 = ((uint32_t)x43 & 0x7fffff);
+ { uint64_t x46 = (x44 + x22);
+ { uint64_t x47 = (x46 >> 0x17);
+ { uint32_t x48 = ((uint32_t)x46 & 0x7fffff);
+ { uint64_t x49 = (x47 + x21);
+ { uint64_t x50 = (x49 >> 0x17);
+ { uint32_t x51 = ((uint32_t)x49 & 0x7fffff);
+ { uint64_t x52 = (x50 + x20);
+ { uint32_t x53 = (uint32_t) (x52 >> 0x17);
+ { uint32_t x54 = ((uint32_t)x52 & 0x7fffff);
+ { uint64_t x55 = (x53 + x19);
+ { uint32_t x56 = (uint32_t) (x55 >> 0x17);
+ { uint32_t x57 = ((uint32_t)x55 & 0x7fffff);
+ { uint64_t x58 = (x30 + ((uint64_t)0x1b * x56));
+ { uint32_t x59 = (uint32_t) (x58 >> 0x17);
+ { uint32_t x60 = ((uint32_t)x58 & 0x7fffff);
+ { uint32_t x61 = (x59 + x33);
+ { uint32_t x62 = (x61 >> 0x17);
+ { uint32_t x63 = (x61 & 0x7fffff);
+ out[0] = x60;
+ out[1] = x63;
+ out[2] = (x62 + x36);
+ out[3] = x39;
+ out[4] = x42;
+ out[5] = x45;
+ out[6] = x48;
+ out[7] = x51;
+ out[8] = x54;
+ out[9] = x57;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e230m27/freeze.c b/src/Specific/solinas32_2e230m27/freeze.c
index 36de528ff..0de32b862 100644
--- a/src/Specific/solinas32_2e230m27/freeze.c
+++ b/src/Specific/solinas32_2e230m27/freeze.c
@@ -1,25 +1,54 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x20;
-out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 23 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7fffe5;;
+static void freeze(uint32_t out[10], const uint32_t in1[10]) {
+ { const uint32_t x17 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x20, uint8_t x21 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x7fffe5);
+ { uint32_t x23, uint8_t x24 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x21, Return x4, 0x7fffff);
+ { uint32_t x26, uint8_t x27 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x24, Return x6, 0x7fffff);
+ { uint32_t x29, uint8_t x30 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x27, Return x8, 0x7fffff);
+ { uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x30, Return x10, 0x7fffff);
+ { uint32_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x12, 0x7fffff);
+ { uint32_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x36, Return x14, 0x7fffff);
+ { uint32_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x39, Return x16, 0x7fffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x18, 0x7fffff);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x17, 0x7fffff);
+ { uint32_t x49 = (uint32_t)cmovznz(x48, 0x0, 0xffffffff);
+ { uint32_t x50 = (x49 & 0x7fffe5);
+ { uint32_t x52, uint8_t x53 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x20, Return x50);
+ { uint32_t x54 = (x49 & 0x7fffff);
+ { uint32_t x56, uint8_t x57 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x53, Return x23, Return x54);
+ { uint32_t x58 = (x49 & 0x7fffff);
+ { uint32_t x60, uint8_t x61 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x26, Return x58);
+ { uint32_t x62 = (x49 & 0x7fffff);
+ { uint32_t x64, uint8_t x65 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x61, Return x29, Return x62);
+ { uint32_t x66 = (x49 & 0x7fffff);
+ { uint32_t x68, uint8_t x69 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x65, Return x32, Return x66);
+ { uint32_t x70 = (x49 & 0x7fffff);
+ { uint32_t x72, uint8_t x73 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x69, Return x35, Return x70);
+ { uint32_t x74 = (x49 & 0x7fffff);
+ { uint32_t x76, uint8_t x77 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x73, Return x38, Return x74);
+ { uint32_t x78 = (x49 & 0x7fffff);
+ { uint32_t x80, uint8_t x81 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x77, Return x41, Return x78);
+ { uint32_t x82 = (x49 & 0x7fffff);
+ { uint32_t x84, uint8_t x85 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x81, Return x44, Return x82);
+ { uint32_t x86 = (x49 & 0x7fffff);
+ { uint32_t x88, uint8_t _ = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x85, Return x47, Return x86);
+ out[0] = x52;
+ out[1] = x56;
+ out[2] = x60;
+ out[3] = x64;
+ out[4] = x68;
+ out[5] = x72;
+ out[6] = x76;
+ out[7] = x80;
+ out[8] = x84;
+ out[9] = x88;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e235m15/femul.c b/src/Specific/solinas32_2e235m15/femul.c
index d65148e6d..ba306b31b 100644
--- a/src/Specific/solinas32_2e235m15/femul.c
+++ b/src/Specific/solinas32_2e235m15/femul.c
@@ -1,71 +1,71 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
-{ uint64_t x36 = (((uint64_t)x5 * x34) + ((0x2 * ((uint64_t)x7 * x35)) + ((0x2 * ((uint64_t)x9 * x33)) + ((0x2 * ((uint64_t)x11 * x31)) + ((0x2 * ((uint64_t)x13 * x29)) + ((0x2 * ((uint64_t)x15 * x27)) + ((0x2 * ((uint64_t)x17 * x25)) + ((0x2 * ((uint64_t)x19 * x23)) + ((uint64_t)x18 * x21)))))))));
-{ uint64_t x37 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((0x2 * ((uint64_t)x13 * x27)) + ((0x2 * ((uint64_t)x15 * x25)) + ((0x2 * ((uint64_t)x17 * x23)) + ((uint64_t)x19 * x21)))))))) + (0xf * ((uint64_t)x18 * x34)));
-{ uint64_t x38 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((0x2 * ((uint64_t)x11 * x27)) + ((0x2 * ((uint64_t)x13 * x25)) + ((0x2 * ((uint64_t)x15 * x23)) + ((uint64_t)x17 * x21))))))) + (0xf * (((uint64_t)x19 * x34) + ((uint64_t)x18 * x35))));
-{ uint64_t x39 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((0x2 * ((uint64_t)x9 * x27)) + ((0x2 * ((uint64_t)x11 * x25)) + ((0x2 * ((uint64_t)x13 * x23)) + ((uint64_t)x15 * x21)))))) + (0xf * (((uint64_t)x17 * x34) + (((uint64_t)x19 * x35) + ((uint64_t)x18 * x33)))));
-{ uint64_t x40 = ((((uint64_t)x5 * x29) + ((0x2 * ((uint64_t)x7 * x27)) + ((0x2 * ((uint64_t)x9 * x25)) + ((0x2 * ((uint64_t)x11 * x23)) + ((uint64_t)x13 * x21))))) + (0xf * (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))));
-{ uint64_t x41 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((uint64_t)x11 * x21)))) + (0xf * (((uint64_t)x13 * x34) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + ((uint64_t)x18 * x29)))))));
-{ uint64_t x42 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((uint64_t)x9 * x21))) + (0xf * (((uint64_t)x11 * x34) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + (((uint64_t)x19 * x29) + ((uint64_t)x18 * x27))))))));
-{ uint64_t x43 = ((((uint64_t)x5 * x23) + ((uint64_t)x7 * x21)) + (0xf * (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))));
-{ uint64_t x44 = (((uint64_t)x5 * x21) + (0xf * ((0x2 * ((uint64_t)x7 * x34)) + ((0x2 * ((uint64_t)x9 * x35)) + ((0x2 * ((uint64_t)x11 * x33)) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((0x2 * ((uint64_t)x17 * x27)) + ((0x2 * ((uint64_t)x19 * x25)) + (0x2 * ((uint64_t)x18 * x23)))))))))));
-{ uint64_t x45 = (x44 >> 0x1b);
-{ uint32_t x46 = ((uint32_t)x44 & 0x7ffffff);
-{ uint64_t x47 = (x45 + x43);
-{ uint64_t x48 = (x47 >> 0x1a);
-{ uint32_t x49 = ((uint32_t)x47 & 0x3ffffff);
-{ uint64_t x50 = (x48 + x42);
-{ uint64_t x51 = (x50 >> 0x1a);
-{ uint32_t x52 = ((uint32_t)x50 & 0x3ffffff);
-{ uint64_t x53 = (x51 + x41);
-{ uint64_t x54 = (x53 >> 0x1a);
-{ uint32_t x55 = ((uint32_t)x53 & 0x3ffffff);
-{ uint64_t x56 = (x54 + x40);
-{ uint64_t x57 = (x56 >> 0x1a);
-{ uint32_t x58 = ((uint32_t)x56 & 0x3ffffff);
-{ uint64_t x59 = (x57 + x39);
-{ uint64_t x60 = (x59 >> 0x1a);
-{ uint32_t x61 = ((uint32_t)x59 & 0x3ffffff);
-{ uint64_t x62 = (x60 + x38);
-{ uint64_t x63 = (x62 >> 0x1a);
-{ uint32_t x64 = ((uint32_t)x62 & 0x3ffffff);
-{ uint64_t x65 = (x63 + x37);
-{ uint64_t x66 = (x65 >> 0x1a);
-{ uint32_t x67 = ((uint32_t)x65 & 0x3ffffff);
-{ uint64_t x68 = (x66 + x36);
-{ uint64_t x69 = (x68 >> 0x1a);
-{ uint32_t x70 = ((uint32_t)x68 & 0x3ffffff);
-{ uint64_t x71 = (x46 + (0xf * x69));
-{ uint32_t x72 = (uint32_t) (x71 >> 0x1b);
-{ uint32_t x73 = ((uint32_t)x71 & 0x7ffffff);
-{ uint32_t x74 = (x72 + x49);
-{ uint32_t x75 = (x74 >> 0x1a);
-{ uint32_t x76 = (x74 & 0x3ffffff);
-out[0] = x70;
-out[1] = x67;
-out[2] = x64;
-out[3] = x61;
-out[4] = x58;
-out[5] = x55;
-out[6] = x75 + x52;
-out[7] = x76;
-out[8] = x73;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void femul(uint32_t out[9], const uint32_t in1[9], const uint32_t in2[9]) {
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x34 = in2[8];
+ { const uint32_t x35 = in2[7];
+ { const uint32_t x33 = in2[6];
+ { const uint32_t x31 = in2[5];
+ { const uint32_t x29 = in2[4];
+ { const uint32_t x27 = in2[3];
+ { const uint32_t x25 = in2[2];
+ { const uint32_t x23 = in2[1];
+ { const uint32_t x21 = in2[0];
+ { uint64_t x36 = (((uint64_t)x5 * x34) + ((0x2 * ((uint64_t)x7 * x35)) + ((0x2 * ((uint64_t)x9 * x33)) + ((0x2 * ((uint64_t)x11 * x31)) + ((0x2 * ((uint64_t)x13 * x29)) + ((0x2 * ((uint64_t)x15 * x27)) + ((0x2 * ((uint64_t)x17 * x25)) + ((0x2 * ((uint64_t)x19 * x23)) + ((uint64_t)x18 * x21)))))))));
+ { uint64_t x37 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((0x2 * ((uint64_t)x13 * x27)) + ((0x2 * ((uint64_t)x15 * x25)) + ((0x2 * ((uint64_t)x17 * x23)) + ((uint64_t)x19 * x21)))))))) + (0xf * ((uint64_t)x18 * x34)));
+ { uint64_t x38 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((0x2 * ((uint64_t)x11 * x27)) + ((0x2 * ((uint64_t)x13 * x25)) + ((0x2 * ((uint64_t)x15 * x23)) + ((uint64_t)x17 * x21))))))) + (0xf * (((uint64_t)x19 * x34) + ((uint64_t)x18 * x35))));
+ { uint64_t x39 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((0x2 * ((uint64_t)x9 * x27)) + ((0x2 * ((uint64_t)x11 * x25)) + ((0x2 * ((uint64_t)x13 * x23)) + ((uint64_t)x15 * x21)))))) + (0xf * (((uint64_t)x17 * x34) + (((uint64_t)x19 * x35) + ((uint64_t)x18 * x33)))));
+ { uint64_t x40 = ((((uint64_t)x5 * x29) + ((0x2 * ((uint64_t)x7 * x27)) + ((0x2 * ((uint64_t)x9 * x25)) + ((0x2 * ((uint64_t)x11 * x23)) + ((uint64_t)x13 * x21))))) + (0xf * (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))));
+ { uint64_t x41 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((uint64_t)x11 * x21)))) + (0xf * (((uint64_t)x13 * x34) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + ((uint64_t)x18 * x29)))))));
+ { uint64_t x42 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((uint64_t)x9 * x21))) + (0xf * (((uint64_t)x11 * x34) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + (((uint64_t)x19 * x29) + ((uint64_t)x18 * x27))))))));
+ { uint64_t x43 = ((((uint64_t)x5 * x23) + ((uint64_t)x7 * x21)) + (0xf * (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))));
+ { uint64_t x44 = (((uint64_t)x5 * x21) + (0xf * ((0x2 * ((uint64_t)x7 * x34)) + ((0x2 * ((uint64_t)x9 * x35)) + ((0x2 * ((uint64_t)x11 * x33)) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((0x2 * ((uint64_t)x17 * x27)) + ((0x2 * ((uint64_t)x19 * x25)) + (0x2 * ((uint64_t)x18 * x23)))))))))));
+ { uint64_t x45 = (x44 >> 0x1b);
+ { uint32_t x46 = ((uint32_t)x44 & 0x7ffffff);
+ { uint64_t x47 = (x45 + x43);
+ { uint64_t x48 = (x47 >> 0x1a);
+ { uint32_t x49 = ((uint32_t)x47 & 0x3ffffff);
+ { uint64_t x50 = (x48 + x42);
+ { uint64_t x51 = (x50 >> 0x1a);
+ { uint32_t x52 = ((uint32_t)x50 & 0x3ffffff);
+ { uint64_t x53 = (x51 + x41);
+ { uint64_t x54 = (x53 >> 0x1a);
+ { uint32_t x55 = ((uint32_t)x53 & 0x3ffffff);
+ { uint64_t x56 = (x54 + x40);
+ { uint64_t x57 = (x56 >> 0x1a);
+ { uint32_t x58 = ((uint32_t)x56 & 0x3ffffff);
+ { uint64_t x59 = (x57 + x39);
+ { uint64_t x60 = (x59 >> 0x1a);
+ { uint32_t x61 = ((uint32_t)x59 & 0x3ffffff);
+ { uint64_t x62 = (x60 + x38);
+ { uint64_t x63 = (x62 >> 0x1a);
+ { uint32_t x64 = ((uint32_t)x62 & 0x3ffffff);
+ { uint64_t x65 = (x63 + x37);
+ { uint64_t x66 = (x65 >> 0x1a);
+ { uint32_t x67 = ((uint32_t)x65 & 0x3ffffff);
+ { uint64_t x68 = (x66 + x36);
+ { uint64_t x69 = (x68 >> 0x1a);
+ { uint32_t x70 = ((uint32_t)x68 & 0x3ffffff);
+ { uint64_t x71 = (x46 + (0xf * x69));
+ { uint32_t x72 = (uint32_t) (x71 >> 0x1b);
+ { uint32_t x73 = ((uint32_t)x71 & 0x7ffffff);
+ { uint32_t x74 = (x72 + x49);
+ { uint32_t x75 = (x74 >> 0x1a);
+ { uint32_t x76 = (x74 & 0x3ffffff);
+ out[0] = x73;
+ out[1] = x76;
+ out[2] = (x75 + x52);
+ out[3] = x55;
+ out[4] = x58;
+ out[5] = x61;
+ out[6] = x64;
+ out[7] = x67;
+ out[8] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e235m15/fesquare.c b/src/Specific/solinas32_2e235m15/fesquare.c
index 0e06fc098..950713e6b 100644
--- a/src/Specific/solinas32_2e235m15/fesquare.c
+++ b/src/Specific/solinas32_2e235m15/fesquare.c
@@ -1,71 +1,62 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x17 = (((uint64_t)x2 * x15) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x15 * x2)))))))));
-{ uint64_t x18 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0xf * ((uint64_t)x15 * x15)));
-{ uint64_t x19 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0xf * (((uint64_t)x16 * x15) + ((uint64_t)x15 * x16))));
-{ uint64_t x20 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0xf * (((uint64_t)x14 * x15) + (((uint64_t)x16 * x16) + ((uint64_t)x15 * x14)))));
-{ uint64_t x21 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0xf * (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))));
-{ uint64_t x22 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0xf * (((uint64_t)x10 * x15) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + ((uint64_t)x15 * x10)))))));
-{ uint64_t x23 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0xf * (((uint64_t)x8 * x15) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + ((uint64_t)x15 * x8))))))));
-{ uint64_t x24 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0xf * (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))));
-{ uint64_t x25 = (((uint64_t)x2 * x2) + (0xf * ((0x2 * ((uint64_t)x4 * x15)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + (0x2 * ((uint64_t)x15 * x4)))))))))));
-{ uint64_t x26 = (x25 >> 0x1b);
-{ uint32_t x27 = ((uint32_t)x25 & 0x7ffffff);
-{ uint64_t x28 = (x26 + x24);
-{ uint64_t x29 = (x28 >> 0x1a);
-{ uint32_t x30 = ((uint32_t)x28 & 0x3ffffff);
-{ uint64_t x31 = (x29 + x23);
-{ uint64_t x32 = (x31 >> 0x1a);
-{ uint32_t x33 = ((uint32_t)x31 & 0x3ffffff);
-{ uint64_t x34 = (x32 + x22);
-{ uint64_t x35 = (x34 >> 0x1a);
-{ uint32_t x36 = ((uint32_t)x34 & 0x3ffffff);
-{ uint64_t x37 = (x35 + x21);
-{ uint64_t x38 = (x37 >> 0x1a);
-{ uint32_t x39 = ((uint32_t)x37 & 0x3ffffff);
-{ uint64_t x40 = (x38 + x20);
-{ uint64_t x41 = (x40 >> 0x1a);
-{ uint32_t x42 = ((uint32_t)x40 & 0x3ffffff);
-{ uint64_t x43 = (x41 + x19);
-{ uint64_t x44 = (x43 >> 0x1a);
-{ uint32_t x45 = ((uint32_t)x43 & 0x3ffffff);
-{ uint64_t x46 = (x44 + x18);
-{ uint64_t x47 = (x46 >> 0x1a);
-{ uint32_t x48 = ((uint32_t)x46 & 0x3ffffff);
-{ uint64_t x49 = (x47 + x17);
-{ uint64_t x50 = (x49 >> 0x1a);
-{ uint32_t x51 = ((uint32_t)x49 & 0x3ffffff);
-{ uint64_t x52 = (x27 + (0xf * x50));
-{ uint32_t x53 = (uint32_t) (x52 >> 0x1b);
-{ uint32_t x54 = ((uint32_t)x52 & 0x7ffffff);
-{ uint32_t x55 = (x53 + x30);
-{ uint32_t x56 = (x55 >> 0x1a);
-{ uint32_t x57 = (x55 & 0x3ffffff);
-out[0] = x51;
-out[1] = x48;
-out[2] = x45;
-out[3] = x42;
-out[4] = x39;
-out[5] = x36;
-out[6] = x56 + x33;
-out[7] = x57;
-out[8] = x54;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void fesquare(uint32_t out[9], const uint32_t in1[9]) {
+ { const uint32_t x15 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x17 = (((uint64_t)x2 * x15) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x15 * x2)))))))));
+ { uint64_t x18 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0xf * ((uint64_t)x15 * x15)));
+ { uint64_t x19 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0xf * (((uint64_t)x16 * x15) + ((uint64_t)x15 * x16))));
+ { uint64_t x20 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0xf * (((uint64_t)x14 * x15) + (((uint64_t)x16 * x16) + ((uint64_t)x15 * x14)))));
+ { uint64_t x21 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0xf * (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))));
+ { uint64_t x22 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0xf * (((uint64_t)x10 * x15) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + ((uint64_t)x15 * x10)))))));
+ { uint64_t x23 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0xf * (((uint64_t)x8 * x15) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + ((uint64_t)x15 * x8))))))));
+ { uint64_t x24 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0xf * (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))));
+ { uint64_t x25 = (((uint64_t)x2 * x2) + (0xf * ((0x2 * ((uint64_t)x4 * x15)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + (0x2 * ((uint64_t)x15 * x4)))))))))));
+ { uint64_t x26 = (x25 >> 0x1b);
+ { uint32_t x27 = ((uint32_t)x25 & 0x7ffffff);
+ { uint64_t x28 = (x26 + x24);
+ { uint64_t x29 = (x28 >> 0x1a);
+ { uint32_t x30 = ((uint32_t)x28 & 0x3ffffff);
+ { uint64_t x31 = (x29 + x23);
+ { uint64_t x32 = (x31 >> 0x1a);
+ { uint32_t x33 = ((uint32_t)x31 & 0x3ffffff);
+ { uint64_t x34 = (x32 + x22);
+ { uint64_t x35 = (x34 >> 0x1a);
+ { uint32_t x36 = ((uint32_t)x34 & 0x3ffffff);
+ { uint64_t x37 = (x35 + x21);
+ { uint64_t x38 = (x37 >> 0x1a);
+ { uint32_t x39 = ((uint32_t)x37 & 0x3ffffff);
+ { uint64_t x40 = (x38 + x20);
+ { uint64_t x41 = (x40 >> 0x1a);
+ { uint32_t x42 = ((uint32_t)x40 & 0x3ffffff);
+ { uint64_t x43 = (x41 + x19);
+ { uint64_t x44 = (x43 >> 0x1a);
+ { uint32_t x45 = ((uint32_t)x43 & 0x3ffffff);
+ { uint64_t x46 = (x44 + x18);
+ { uint64_t x47 = (x46 >> 0x1a);
+ { uint32_t x48 = ((uint32_t)x46 & 0x3ffffff);
+ { uint64_t x49 = (x47 + x17);
+ { uint64_t x50 = (x49 >> 0x1a);
+ { uint32_t x51 = ((uint32_t)x49 & 0x3ffffff);
+ { uint64_t x52 = (x27 + (0xf * x50));
+ { uint32_t x53 = (uint32_t) (x52 >> 0x1b);
+ { uint32_t x54 = ((uint32_t)x52 & 0x7ffffff);
+ { uint32_t x55 = (x53 + x30);
+ { uint32_t x56 = (x55 >> 0x1a);
+ { uint32_t x57 = (x55 & 0x3ffffff);
+ out[0] = x54;
+ out[1] = x57;
+ out[2] = (x56 + x33);
+ out[3] = x36;
+ out[4] = x39;
+ out[5] = x42;
+ out[6] = x45;
+ out[7] = x48;
+ out[8] = x51;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e235m15/freeze.c b/src/Specific/solinas32_2e235m15/freeze.c
index 705f35e6e..200181eb7 100644
--- a/src/Specific/solinas32_2e235m15/freeze.c
+++ b/src/Specific/solinas32_2e235m15/freeze.c
@@ -1,25 +1,49 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x18;
-out[1] = uint8_t x19 = Op Syntax.SubWithGetBorrow 27 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7fffff1;;
+static void freeze(uint32_t out[9], const uint32_t in1[9]) {
+ { const uint32_t x15 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x7fffff1);
+ { uint32_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x19, Return x4, 0x3ffffff);
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x22, Return x6, 0x3ffffff);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x8, 0x3ffffff);
+ { uint32_t x30, uint8_t x31 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x28, Return x10, 0x3ffffff);
+ { uint32_t x33, uint8_t x34 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x31, Return x12, 0x3ffffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x34, Return x14, 0x3ffffff);
+ { uint32_t x39, uint8_t x40 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x16, 0x3ffffff);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x40, Return x15, 0x3ffffff);
+ { uint32_t x44 = (uint32_t)cmovznz(x43, 0x0, 0xffffffff);
+ { uint32_t x45 = (x44 & 0x7fffff1);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x18, Return x45);
+ { uint32_t x49 = (x44 & 0x3ffffff);
+ { uint32_t x51, uint8_t x52 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x48, Return x21, Return x49);
+ { uint32_t x53 = (x44 & 0x3ffffff);
+ { uint32_t x55, uint8_t x56 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x52, Return x24, Return x53);
+ { uint32_t x57 = (x44 & 0x3ffffff);
+ { uint32_t x59, uint8_t x60 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x56, Return x27, Return x57);
+ { uint32_t x61 = (x44 & 0x3ffffff);
+ { uint32_t x63, uint8_t x64 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x30, Return x61);
+ { uint32_t x65 = (x44 & 0x3ffffff);
+ { uint32_t x67, uint8_t x68 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x64, Return x33, Return x65);
+ { uint32_t x69 = (x44 & 0x3ffffff);
+ { uint32_t x71, uint8_t x72 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x68, Return x36, Return x69);
+ { uint32_t x73 = (x44 & 0x3ffffff);
+ { uint32_t x75, uint8_t x76 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x39, Return x73);
+ { uint32_t x77 = (x44 & 0x3ffffff);
+ { uint32_t x79, uint8_t _ = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x76, Return x42, Return x77);
+ out[0] = x47;
+ out[1] = x51;
+ out[2] = x55;
+ out[3] = x59;
+ out[4] = x63;
+ out[5] = x67;
+ out[6] = x71;
+ out[7] = x75;
+ out[8] = x79;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e243m9/femul.c b/src/Specific/solinas32_2e243m9/femul.c
index 883d95b41..c5e1a658b 100644
--- a/src/Specific/solinas32_2e243m9/femul.c
+++ b/src/Specific/solinas32_2e243m9/femul.c
@@ -1,71 +1,71 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
-{ uint64_t x36 = (((uint64_t)x5 * x34) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + (((uint64_t)x19 * x23) + ((uint64_t)x18 * x21)))))))));
-{ uint64_t x37 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x19 * x21)))))))) + (0x9 * ((uint64_t)x18 * x34)));
-{ uint64_t x38 = ((((uint64_t)x5 * x33) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + ((uint64_t)x17 * x21))))))) + (0x9 * (((uint64_t)x19 * x34) + ((uint64_t)x18 * x35))));
-{ uint64_t x39 = ((((uint64_t)x5 * x31) + (((uint64_t)x7 * x29) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + ((uint64_t)x15 * x21)))))) + (0x9 * (((uint64_t)x17 * x34) + (((uint64_t)x19 * x35) + ((uint64_t)x18 * x33)))));
-{ uint64_t x40 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + ((uint64_t)x13 * x21))))) + (0x9 * (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))));
-{ uint64_t x41 = ((((uint64_t)x5 * x27) + (((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + ((uint64_t)x11 * x21)))) + (0x9 * (((uint64_t)x13 * x34) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + ((uint64_t)x18 * x29)))))));
-{ uint64_t x42 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + ((uint64_t)x9 * x21))) + (0x9 * (((uint64_t)x11 * x34) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + (((uint64_t)x19 * x29) + ((uint64_t)x18 * x27))))))));
-{ uint64_t x43 = ((((uint64_t)x5 * x23) + ((uint64_t)x7 * x21)) + (0x9 * (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))));
-{ uint64_t x44 = (((uint64_t)x5 * x21) + (0x9 * (((uint64_t)x7 * x34) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + (((uint64_t)x17 * x27) + (((uint64_t)x19 * x25) + ((uint64_t)x18 * x23))))))))));
-{ uint64_t x45 = (x44 >> 0x1b);
-{ uint32_t x46 = ((uint32_t)x44 & 0x7ffffff);
-{ uint64_t x47 = (x45 + x43);
-{ uint64_t x48 = (x47 >> 0x1b);
-{ uint32_t x49 = ((uint32_t)x47 & 0x7ffffff);
-{ uint64_t x50 = (x48 + x42);
-{ uint64_t x51 = (x50 >> 0x1b);
-{ uint32_t x52 = ((uint32_t)x50 & 0x7ffffff);
-{ uint64_t x53 = (x51 + x41);
-{ uint64_t x54 = (x53 >> 0x1b);
-{ uint32_t x55 = ((uint32_t)x53 & 0x7ffffff);
-{ uint64_t x56 = (x54 + x40);
-{ uint64_t x57 = (x56 >> 0x1b);
-{ uint32_t x58 = ((uint32_t)x56 & 0x7ffffff);
-{ uint64_t x59 = (x57 + x39);
-{ uint64_t x60 = (x59 >> 0x1b);
-{ uint32_t x61 = ((uint32_t)x59 & 0x7ffffff);
-{ uint64_t x62 = (x60 + x38);
-{ uint64_t x63 = (x62 >> 0x1b);
-{ uint32_t x64 = ((uint32_t)x62 & 0x7ffffff);
-{ uint64_t x65 = (x63 + x37);
-{ uint64_t x66 = (x65 >> 0x1b);
-{ uint32_t x67 = ((uint32_t)x65 & 0x7ffffff);
-{ uint64_t x68 = (x66 + x36);
-{ uint64_t x69 = (x68 >> 0x1b);
-{ uint32_t x70 = ((uint32_t)x68 & 0x7ffffff);
-{ uint64_t x71 = (x46 + (0x9 * x69));
-{ uint32_t x72 = (uint32_t) (x71 >> 0x1b);
-{ uint32_t x73 = ((uint32_t)x71 & 0x7ffffff);
-{ uint32_t x74 = (x72 + x49);
-{ uint32_t x75 = (x74 >> 0x1b);
-{ uint32_t x76 = (x74 & 0x7ffffff);
-out[0] = x70;
-out[1] = x67;
-out[2] = x64;
-out[3] = x61;
-out[4] = x58;
-out[5] = x55;
-out[6] = x75 + x52;
-out[7] = x76;
-out[8] = x73;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void femul(uint32_t out[9], const uint32_t in1[9], const uint32_t in2[9]) {
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x34 = in2[8];
+ { const uint32_t x35 = in2[7];
+ { const uint32_t x33 = in2[6];
+ { const uint32_t x31 = in2[5];
+ { const uint32_t x29 = in2[4];
+ { const uint32_t x27 = in2[3];
+ { const uint32_t x25 = in2[2];
+ { const uint32_t x23 = in2[1];
+ { const uint32_t x21 = in2[0];
+ { uint64_t x36 = (((uint64_t)x5 * x34) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + (((uint64_t)x19 * x23) + ((uint64_t)x18 * x21)))))))));
+ { uint64_t x37 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x19 * x21)))))))) + (0x9 * ((uint64_t)x18 * x34)));
+ { uint64_t x38 = ((((uint64_t)x5 * x33) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + ((uint64_t)x17 * x21))))))) + (0x9 * (((uint64_t)x19 * x34) + ((uint64_t)x18 * x35))));
+ { uint64_t x39 = ((((uint64_t)x5 * x31) + (((uint64_t)x7 * x29) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + ((uint64_t)x15 * x21)))))) + (0x9 * (((uint64_t)x17 * x34) + (((uint64_t)x19 * x35) + ((uint64_t)x18 * x33)))));
+ { uint64_t x40 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + ((uint64_t)x13 * x21))))) + (0x9 * (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))));
+ { uint64_t x41 = ((((uint64_t)x5 * x27) + (((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + ((uint64_t)x11 * x21)))) + (0x9 * (((uint64_t)x13 * x34) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + ((uint64_t)x18 * x29)))))));
+ { uint64_t x42 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + ((uint64_t)x9 * x21))) + (0x9 * (((uint64_t)x11 * x34) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + (((uint64_t)x19 * x29) + ((uint64_t)x18 * x27))))))));
+ { uint64_t x43 = ((((uint64_t)x5 * x23) + ((uint64_t)x7 * x21)) + (0x9 * (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))));
+ { uint64_t x44 = (((uint64_t)x5 * x21) + (0x9 * (((uint64_t)x7 * x34) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + (((uint64_t)x17 * x27) + (((uint64_t)x19 * x25) + ((uint64_t)x18 * x23))))))))));
+ { uint64_t x45 = (x44 >> 0x1b);
+ { uint32_t x46 = ((uint32_t)x44 & 0x7ffffff);
+ { uint64_t x47 = (x45 + x43);
+ { uint64_t x48 = (x47 >> 0x1b);
+ { uint32_t x49 = ((uint32_t)x47 & 0x7ffffff);
+ { uint64_t x50 = (x48 + x42);
+ { uint64_t x51 = (x50 >> 0x1b);
+ { uint32_t x52 = ((uint32_t)x50 & 0x7ffffff);
+ { uint64_t x53 = (x51 + x41);
+ { uint64_t x54 = (x53 >> 0x1b);
+ { uint32_t x55 = ((uint32_t)x53 & 0x7ffffff);
+ { uint64_t x56 = (x54 + x40);
+ { uint64_t x57 = (x56 >> 0x1b);
+ { uint32_t x58 = ((uint32_t)x56 & 0x7ffffff);
+ { uint64_t x59 = (x57 + x39);
+ { uint64_t x60 = (x59 >> 0x1b);
+ { uint32_t x61 = ((uint32_t)x59 & 0x7ffffff);
+ { uint64_t x62 = (x60 + x38);
+ { uint64_t x63 = (x62 >> 0x1b);
+ { uint32_t x64 = ((uint32_t)x62 & 0x7ffffff);
+ { uint64_t x65 = (x63 + x37);
+ { uint64_t x66 = (x65 >> 0x1b);
+ { uint32_t x67 = ((uint32_t)x65 & 0x7ffffff);
+ { uint64_t x68 = (x66 + x36);
+ { uint64_t x69 = (x68 >> 0x1b);
+ { uint32_t x70 = ((uint32_t)x68 & 0x7ffffff);
+ { uint64_t x71 = (x46 + (0x9 * x69));
+ { uint32_t x72 = (uint32_t) (x71 >> 0x1b);
+ { uint32_t x73 = ((uint32_t)x71 & 0x7ffffff);
+ { uint32_t x74 = (x72 + x49);
+ { uint32_t x75 = (x74 >> 0x1b);
+ { uint32_t x76 = (x74 & 0x7ffffff);
+ out[0] = x73;
+ out[1] = x76;
+ out[2] = (x75 + x52);
+ out[3] = x55;
+ out[4] = x58;
+ out[5] = x61;
+ out[6] = x64;
+ out[7] = x67;
+ out[8] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e243m9/fesquare.c b/src/Specific/solinas32_2e243m9/fesquare.c
index 48d5c095a..01abea524 100644
--- a/src/Specific/solinas32_2e243m9/fesquare.c
+++ b/src/Specific/solinas32_2e243m9/fesquare.c
@@ -1,71 +1,62 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x17 = (((uint64_t)x2 * x15) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x15 * x2)))))))));
-{ uint64_t x18 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x9 * ((uint64_t)x15 * x15)));
-{ uint64_t x19 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x9 * (((uint64_t)x16 * x15) + ((uint64_t)x15 * x16))));
-{ uint64_t x20 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x9 * (((uint64_t)x14 * x15) + (((uint64_t)x16 * x16) + ((uint64_t)x15 * x14)))));
-{ uint64_t x21 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x9 * (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))));
-{ uint64_t x22 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x9 * (((uint64_t)x10 * x15) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + ((uint64_t)x15 * x10)))))));
-{ uint64_t x23 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x9 * (((uint64_t)x8 * x15) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + ((uint64_t)x15 * x8))))))));
-{ uint64_t x24 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x9 * (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))));
-{ uint64_t x25 = (((uint64_t)x2 * x2) + (0x9 * (((uint64_t)x4 * x15) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + ((uint64_t)x15 * x4))))))))));
-{ uint64_t x26 = (x25 >> 0x1b);
-{ uint32_t x27 = ((uint32_t)x25 & 0x7ffffff);
-{ uint64_t x28 = (x26 + x24);
-{ uint64_t x29 = (x28 >> 0x1b);
-{ uint32_t x30 = ((uint32_t)x28 & 0x7ffffff);
-{ uint64_t x31 = (x29 + x23);
-{ uint64_t x32 = (x31 >> 0x1b);
-{ uint32_t x33 = ((uint32_t)x31 & 0x7ffffff);
-{ uint64_t x34 = (x32 + x22);
-{ uint64_t x35 = (x34 >> 0x1b);
-{ uint32_t x36 = ((uint32_t)x34 & 0x7ffffff);
-{ uint64_t x37 = (x35 + x21);
-{ uint64_t x38 = (x37 >> 0x1b);
-{ uint32_t x39 = ((uint32_t)x37 & 0x7ffffff);
-{ uint64_t x40 = (x38 + x20);
-{ uint64_t x41 = (x40 >> 0x1b);
-{ uint32_t x42 = ((uint32_t)x40 & 0x7ffffff);
-{ uint64_t x43 = (x41 + x19);
-{ uint64_t x44 = (x43 >> 0x1b);
-{ uint32_t x45 = ((uint32_t)x43 & 0x7ffffff);
-{ uint64_t x46 = (x44 + x18);
-{ uint64_t x47 = (x46 >> 0x1b);
-{ uint32_t x48 = ((uint32_t)x46 & 0x7ffffff);
-{ uint64_t x49 = (x47 + x17);
-{ uint64_t x50 = (x49 >> 0x1b);
-{ uint32_t x51 = ((uint32_t)x49 & 0x7ffffff);
-{ uint64_t x52 = (x27 + (0x9 * x50));
-{ uint32_t x53 = (uint32_t) (x52 >> 0x1b);
-{ uint32_t x54 = ((uint32_t)x52 & 0x7ffffff);
-{ uint32_t x55 = (x53 + x30);
-{ uint32_t x56 = (x55 >> 0x1b);
-{ uint32_t x57 = (x55 & 0x7ffffff);
-out[0] = x51;
-out[1] = x48;
-out[2] = x45;
-out[3] = x42;
-out[4] = x39;
-out[5] = x36;
-out[6] = x56 + x33;
-out[7] = x57;
-out[8] = x54;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void fesquare(uint32_t out[9], const uint32_t in1[9]) {
+ { const uint32_t x15 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x17 = (((uint64_t)x2 * x15) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x15 * x2)))))))));
+ { uint64_t x18 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x9 * ((uint64_t)x15 * x15)));
+ { uint64_t x19 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x9 * (((uint64_t)x16 * x15) + ((uint64_t)x15 * x16))));
+ { uint64_t x20 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x9 * (((uint64_t)x14 * x15) + (((uint64_t)x16 * x16) + ((uint64_t)x15 * x14)))));
+ { uint64_t x21 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x9 * (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))));
+ { uint64_t x22 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x9 * (((uint64_t)x10 * x15) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + ((uint64_t)x15 * x10)))))));
+ { uint64_t x23 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x9 * (((uint64_t)x8 * x15) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + ((uint64_t)x15 * x8))))))));
+ { uint64_t x24 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x9 * (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))));
+ { uint64_t x25 = (((uint64_t)x2 * x2) + (0x9 * (((uint64_t)x4 * x15) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + ((uint64_t)x15 * x4))))))))));
+ { uint64_t x26 = (x25 >> 0x1b);
+ { uint32_t x27 = ((uint32_t)x25 & 0x7ffffff);
+ { uint64_t x28 = (x26 + x24);
+ { uint64_t x29 = (x28 >> 0x1b);
+ { uint32_t x30 = ((uint32_t)x28 & 0x7ffffff);
+ { uint64_t x31 = (x29 + x23);
+ { uint64_t x32 = (x31 >> 0x1b);
+ { uint32_t x33 = ((uint32_t)x31 & 0x7ffffff);
+ { uint64_t x34 = (x32 + x22);
+ { uint64_t x35 = (x34 >> 0x1b);
+ { uint32_t x36 = ((uint32_t)x34 & 0x7ffffff);
+ { uint64_t x37 = (x35 + x21);
+ { uint64_t x38 = (x37 >> 0x1b);
+ { uint32_t x39 = ((uint32_t)x37 & 0x7ffffff);
+ { uint64_t x40 = (x38 + x20);
+ { uint64_t x41 = (x40 >> 0x1b);
+ { uint32_t x42 = ((uint32_t)x40 & 0x7ffffff);
+ { uint64_t x43 = (x41 + x19);
+ { uint64_t x44 = (x43 >> 0x1b);
+ { uint32_t x45 = ((uint32_t)x43 & 0x7ffffff);
+ { uint64_t x46 = (x44 + x18);
+ { uint64_t x47 = (x46 >> 0x1b);
+ { uint32_t x48 = ((uint32_t)x46 & 0x7ffffff);
+ { uint64_t x49 = (x47 + x17);
+ { uint64_t x50 = (x49 >> 0x1b);
+ { uint32_t x51 = ((uint32_t)x49 & 0x7ffffff);
+ { uint64_t x52 = (x27 + (0x9 * x50));
+ { uint32_t x53 = (uint32_t) (x52 >> 0x1b);
+ { uint32_t x54 = ((uint32_t)x52 & 0x7ffffff);
+ { uint32_t x55 = (x53 + x30);
+ { uint32_t x56 = (x55 >> 0x1b);
+ { uint32_t x57 = (x55 & 0x7ffffff);
+ out[0] = x54;
+ out[1] = x57;
+ out[2] = (x56 + x33);
+ out[3] = x36;
+ out[4] = x39;
+ out[5] = x42;
+ out[6] = x45;
+ out[7] = x48;
+ out[8] = x51;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e243m9/freeze.c b/src/Specific/solinas32_2e243m9/freeze.c
index 81cc1a512..f6d5f81b4 100644
--- a/src/Specific/solinas32_2e243m9/freeze.c
+++ b/src/Specific/solinas32_2e243m9/freeze.c
@@ -1,25 +1,49 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x18;
-out[1] = uint8_t x19 = Op Syntax.SubWithGetBorrow 27 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7fffff7;;
+static void freeze(uint32_t out[9], const uint32_t in1[9]) {
+ { const uint32_t x15 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x7fffff7);
+ { uint32_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x19, Return x4, 0x7ffffff);
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x22, Return x6, 0x7ffffff);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x8, 0x7ffffff);
+ { uint32_t x30, uint8_t x31 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x28, Return x10, 0x7ffffff);
+ { uint32_t x33, uint8_t x34 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x31, Return x12, 0x7ffffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x34, Return x14, 0x7ffffff);
+ { uint32_t x39, uint8_t x40 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x16, 0x7ffffff);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.SubWithGetBorrow 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x40, Return x15, 0x7ffffff);
+ { uint32_t x44 = (uint32_t)cmovznz(x43, 0x0, 0xffffffff);
+ { uint32_t x45 = (x44 & 0x7fffff7);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x18, Return x45);
+ { uint32_t x49 = (x44 & 0x7ffffff);
+ { uint32_t x51, uint8_t x52 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x48, Return x21, Return x49);
+ { uint32_t x53 = (x44 & 0x7ffffff);
+ { uint32_t x55, uint8_t x56 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x52, Return x24, Return x53);
+ { uint32_t x57 = (x44 & 0x7ffffff);
+ { uint32_t x59, uint8_t x60 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x56, Return x27, Return x57);
+ { uint32_t x61 = (x44 & 0x7ffffff);
+ { uint32_t x63, uint8_t x64 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x30, Return x61);
+ { uint32_t x65 = (x44 & 0x7ffffff);
+ { uint32_t x67, uint8_t x68 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x64, Return x33, Return x65);
+ { uint32_t x69 = (x44 & 0x7ffffff);
+ { uint32_t x71, uint8_t x72 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x68, Return x36, Return x69);
+ { uint32_t x73 = (x44 & 0x7ffffff);
+ { uint32_t x75, uint8_t x76 = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x39, Return x73);
+ { uint32_t x77 = (x44 & 0x7ffffff);
+ { uint32_t x79, uint8_t _ = Op (Syntax.AddWithGetCarry 27 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x76, Return x42, Return x77);
+ out[0] = x47;
+ out[1] = x51;
+ out[2] = x55;
+ out[3] = x59;
+ out[4] = x63;
+ out[5] = x67;
+ out[6] = x71;
+ out[7] = x75;
+ out[8] = x79;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e251m9/femul.c b/src/Specific/solinas32_2e251m9/femul.c
index f941d0c36..62632bb16 100644
--- a/src/Specific/solinas32_2e251m9/femul.c
+++ b/src/Specific/solinas32_2e251m9/femul.c
@@ -1,76 +1,78 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
-{ uint64_t x40 = (((uint64_t)x5 * x38) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((0x2 * ((uint64_t)x11 * x35)) + ((0x2 * ((uint64_t)x13 * x33)) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((0x2 * ((uint64_t)x19 * x27)) + ((0x2 * ((uint64_t)x21 * x25)) + ((uint64_t)x20 * x23))))))))));
-{ uint64_t x41 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + ((0x2 * ((uint64_t)x11 * x33)) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((0x2 * ((uint64_t)x17 * x27)) + ((0x2 * ((uint64_t)x19 * x25)) + ((uint64_t)x21 * x23))))))))) + (0x9 * ((uint64_t)x20 * x38)));
-{ uint64_t x42 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + ((0x2 * ((uint64_t)x9 * x33)) + ((0x2 * ((uint64_t)x11 * x31)) + ((0x2 * ((uint64_t)x13 * x29)) + ((0x2 * ((uint64_t)x15 * x27)) + ((0x2 * ((uint64_t)x17 * x25)) + ((uint64_t)x19 * x23)))))))) + (0x9 * (((uint64_t)x21 * x38) + ((uint64_t)x20 * x39))));
-{ uint64_t x43 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((0x2 * ((uint64_t)x13 * x27)) + ((0x2 * ((uint64_t)x15 * x25)) + ((uint64_t)x17 * x23))))))) + (0x9 * (((uint64_t)x19 * x38) + (((uint64_t)x21 * x39) + ((uint64_t)x20 * x37)))));
-{ uint64_t x44 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((0x2 * ((uint64_t)x11 * x27)) + ((0x2 * ((uint64_t)x13 * x25)) + ((uint64_t)x15 * x23)))))) + (0x9 * (((uint64_t)x17 * x38) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + ((uint64_t)x20 * x35))))));
-{ uint64_t x45 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((0x2 * ((uint64_t)x9 * x27)) + ((0x2 * ((uint64_t)x11 * x25)) + ((uint64_t)x13 * x23))))) + (0x9 * (((uint64_t)x15 * x38) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + ((uint64_t)x20 * x33)))))));
-{ uint64_t x46 = ((((uint64_t)x5 * x29) + ((0x2 * ((uint64_t)x7 * x27)) + ((0x2 * ((uint64_t)x9 * x25)) + ((uint64_t)x11 * x23)))) + (0x9 * (((uint64_t)x13 * x38) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((uint64_t)x20 * x31))))))));
-{ uint64_t x47 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((uint64_t)x9 * x23))) + (0x9 * (((uint64_t)x11 * x38) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((uint64_t)x20 * x29)))))))));
-{ uint64_t x48 = ((((uint64_t)x5 * x25) + ((uint64_t)x7 * x23)) + (0x9 * (((uint64_t)x9 * x38) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x20 * x27))))))))));
-{ uint64_t x49 = (((uint64_t)x5 * x23) + (0x9 * ((0x2 * ((uint64_t)x7 * x38)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((0x2 * ((uint64_t)x13 * x35)) + ((0x2 * ((uint64_t)x15 * x33)) + ((0x2 * ((uint64_t)x17 * x31)) + ((0x2 * ((uint64_t)x19 * x29)) + ((0x2 * ((uint64_t)x21 * x27)) + (0x2 * ((uint64_t)x20 * x25))))))))))));
-{ uint64_t x50 = (x49 >> 0x1a);
-{ uint32_t x51 = ((uint32_t)x49 & 0x3ffffff);
-{ uint64_t x52 = (x50 + x48);
-{ uint64_t x53 = (x52 >> 0x19);
-{ uint32_t x54 = ((uint32_t)x52 & 0x1ffffff);
-{ uint64_t x55 = (x53 + x47);
-{ uint64_t x56 = (x55 >> 0x19);
-{ uint32_t x57 = ((uint32_t)x55 & 0x1ffffff);
-{ uint64_t x58 = (x56 + x46);
-{ uint64_t x59 = (x58 >> 0x19);
-{ uint32_t x60 = ((uint32_t)x58 & 0x1ffffff);
-{ uint64_t x61 = (x59 + x45);
-{ uint64_t x62 = (x61 >> 0x19);
-{ uint32_t x63 = ((uint32_t)x61 & 0x1ffffff);
-{ uint64_t x64 = (x62 + x44);
-{ uint64_t x65 = (x64 >> 0x19);
-{ uint32_t x66 = ((uint32_t)x64 & 0x1ffffff);
-{ uint64_t x67 = (x65 + x43);
-{ uint64_t x68 = (x67 >> 0x19);
-{ uint32_t x69 = ((uint32_t)x67 & 0x1ffffff);
-{ uint64_t x70 = (x68 + x42);
-{ uint64_t x71 = (x70 >> 0x19);
-{ uint32_t x72 = ((uint32_t)x70 & 0x1ffffff);
-{ uint64_t x73 = (x71 + x41);
-{ uint64_t x74 = (x73 >> 0x19);
-{ uint32_t x75 = ((uint32_t)x73 & 0x1ffffff);
-{ uint64_t x76 = (x74 + x40);
-{ uint64_t x77 = (x76 >> 0x19);
-{ uint32_t x78 = ((uint32_t)x76 & 0x1ffffff);
-{ uint64_t x79 = (x51 + (0x9 * x77));
-{ uint32_t x80 = (uint32_t) (x79 >> 0x1a);
-{ uint32_t x81 = ((uint32_t)x79 & 0x3ffffff);
-{ uint32_t x82 = (x80 + x54);
-{ uint32_t x83 = (x82 >> 0x19);
-{ uint32_t x84 = (x82 & 0x1ffffff);
-out[0] = x78;
-out[1] = x75;
-out[2] = x72;
-out[3] = x69;
-out[4] = x66;
-out[5] = x63;
-out[6] = x60;
-out[7] = x83 + x57;
-out[8] = x84;
-out[9] = x81;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void femul(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) {
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x38 = in2[9];
+ { const uint32_t x39 = in2[8];
+ { const uint32_t x37 = in2[7];
+ { const uint32_t x35 = in2[6];
+ { const uint32_t x33 = in2[5];
+ { const uint32_t x31 = in2[4];
+ { const uint32_t x29 = in2[3];
+ { const uint32_t x27 = in2[2];
+ { const uint32_t x25 = in2[1];
+ { const uint32_t x23 = in2[0];
+ { uint64_t x40 = (((uint64_t)x5 * x38) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((0x2 * ((uint64_t)x11 * x35)) + ((0x2 * ((uint64_t)x13 * x33)) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((0x2 * ((uint64_t)x19 * x27)) + ((0x2 * ((uint64_t)x21 * x25)) + ((uint64_t)x20 * x23))))))))));
+ { uint64_t x41 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + ((0x2 * ((uint64_t)x11 * x33)) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((0x2 * ((uint64_t)x17 * x27)) + ((0x2 * ((uint64_t)x19 * x25)) + ((uint64_t)x21 * x23))))))))) + (0x9 * ((uint64_t)x20 * x38)));
+ { uint64_t x42 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + ((0x2 * ((uint64_t)x9 * x33)) + ((0x2 * ((uint64_t)x11 * x31)) + ((0x2 * ((uint64_t)x13 * x29)) + ((0x2 * ((uint64_t)x15 * x27)) + ((0x2 * ((uint64_t)x17 * x25)) + ((uint64_t)x19 * x23)))))))) + (0x9 * (((uint64_t)x21 * x38) + ((uint64_t)x20 * x39))));
+ { uint64_t x43 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((0x2 * ((uint64_t)x13 * x27)) + ((0x2 * ((uint64_t)x15 * x25)) + ((uint64_t)x17 * x23))))))) + (0x9 * (((uint64_t)x19 * x38) + (((uint64_t)x21 * x39) + ((uint64_t)x20 * x37)))));
+ { uint64_t x44 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((0x2 * ((uint64_t)x11 * x27)) + ((0x2 * ((uint64_t)x13 * x25)) + ((uint64_t)x15 * x23)))))) + (0x9 * (((uint64_t)x17 * x38) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + ((uint64_t)x20 * x35))))));
+ { uint64_t x45 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((0x2 * ((uint64_t)x9 * x27)) + ((0x2 * ((uint64_t)x11 * x25)) + ((uint64_t)x13 * x23))))) + (0x9 * (((uint64_t)x15 * x38) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + ((uint64_t)x20 * x33)))))));
+ { uint64_t x46 = ((((uint64_t)x5 * x29) + ((0x2 * ((uint64_t)x7 * x27)) + ((0x2 * ((uint64_t)x9 * x25)) + ((uint64_t)x11 * x23)))) + (0x9 * (((uint64_t)x13 * x38) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((uint64_t)x20 * x31))))))));
+ { uint64_t x47 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((uint64_t)x9 * x23))) + (0x9 * (((uint64_t)x11 * x38) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((uint64_t)x20 * x29)))))))));
+ { uint64_t x48 = ((((uint64_t)x5 * x25) + ((uint64_t)x7 * x23)) + (0x9 * (((uint64_t)x9 * x38) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x20 * x27))))))))));
+ { uint64_t x49 = (((uint64_t)x5 * x23) + (0x9 * ((0x2 * ((uint64_t)x7 * x38)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((0x2 * ((uint64_t)x13 * x35)) + ((0x2 * ((uint64_t)x15 * x33)) + ((0x2 * ((uint64_t)x17 * x31)) + ((0x2 * ((uint64_t)x19 * x29)) + ((0x2 * ((uint64_t)x21 * x27)) + (0x2 * ((uint64_t)x20 * x25))))))))))));
+ { uint64_t x50 = (x49 >> 0x1a);
+ { uint32_t x51 = ((uint32_t)x49 & 0x3ffffff);
+ { uint64_t x52 = (x50 + x48);
+ { uint64_t x53 = (x52 >> 0x19);
+ { uint32_t x54 = ((uint32_t)x52 & 0x1ffffff);
+ { uint64_t x55 = (x53 + x47);
+ { uint64_t x56 = (x55 >> 0x19);
+ { uint32_t x57 = ((uint32_t)x55 & 0x1ffffff);
+ { uint64_t x58 = (x56 + x46);
+ { uint64_t x59 = (x58 >> 0x19);
+ { uint32_t x60 = ((uint32_t)x58 & 0x1ffffff);
+ { uint64_t x61 = (x59 + x45);
+ { uint64_t x62 = (x61 >> 0x19);
+ { uint32_t x63 = ((uint32_t)x61 & 0x1ffffff);
+ { uint64_t x64 = (x62 + x44);
+ { uint64_t x65 = (x64 >> 0x19);
+ { uint32_t x66 = ((uint32_t)x64 & 0x1ffffff);
+ { uint64_t x67 = (x65 + x43);
+ { uint64_t x68 = (x67 >> 0x19);
+ { uint32_t x69 = ((uint32_t)x67 & 0x1ffffff);
+ { uint64_t x70 = (x68 + x42);
+ { uint64_t x71 = (x70 >> 0x19);
+ { uint32_t x72 = ((uint32_t)x70 & 0x1ffffff);
+ { uint64_t x73 = (x71 + x41);
+ { uint64_t x74 = (x73 >> 0x19);
+ { uint32_t x75 = ((uint32_t)x73 & 0x1ffffff);
+ { uint64_t x76 = (x74 + x40);
+ { uint64_t x77 = (x76 >> 0x19);
+ { uint32_t x78 = ((uint32_t)x76 & 0x1ffffff);
+ { uint64_t x79 = (x51 + (0x9 * x77));
+ { uint32_t x80 = (uint32_t) (x79 >> 0x1a);
+ { uint32_t x81 = ((uint32_t)x79 & 0x3ffffff);
+ { uint32_t x82 = (x80 + x54);
+ { uint32_t x83 = (x82 >> 0x19);
+ { uint32_t x84 = (x82 & 0x1ffffff);
+ out[0] = x81;
+ out[1] = x84;
+ out[2] = (x83 + x57);
+ out[3] = x60;
+ out[4] = x63;
+ out[5] = x66;
+ out[6] = x69;
+ out[7] = x72;
+ out[8] = x75;
+ out[9] = x78;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e251m9/fesquare.c b/src/Specific/solinas32_2e251m9/fesquare.c
index aba6173ef..d21201afe 100644
--- a/src/Specific/solinas32_2e251m9/fesquare.c
+++ b/src/Specific/solinas32_2e251m9/fesquare.c
@@ -1,76 +1,68 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x19 = (((uint64_t)x2 * x17) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x17 * x2))))))))));
-{ uint64_t x20 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x9 * ((uint64_t)x17 * x17)));
-{ uint64_t x21 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x9 * (((uint64_t)x18 * x17) + ((uint64_t)x17 * x18))));
-{ uint64_t x22 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x9 * (((uint64_t)x16 * x17) + (((uint64_t)x18 * x18) + ((uint64_t)x17 * x16)))));
-{ uint64_t x23 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x9 * (((uint64_t)x14 * x17) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((uint64_t)x17 * x14))))));
-{ uint64_t x24 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x9 * (((uint64_t)x12 * x17) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + ((uint64_t)x17 * x12)))))));
-{ uint64_t x25 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x9 * (((uint64_t)x10 * x17) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + ((uint64_t)x17 * x10))))))));
-{ uint64_t x26 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x9 * (((uint64_t)x8 * x17) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + ((uint64_t)x17 * x8)))))))));
-{ uint64_t x27 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x9 * (((uint64_t)x6 * x17) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((uint64_t)x17 * x6))))))))));
-{ uint64_t x28 = (((uint64_t)x2 * x2) + (0x9 * ((0x2 * ((uint64_t)x4 * x17)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + (0x2 * ((uint64_t)x17 * x4))))))))))));
-{ uint64_t x29 = (x28 >> 0x1a);
-{ uint32_t x30 = ((uint32_t)x28 & 0x3ffffff);
-{ uint64_t x31 = (x29 + x27);
-{ uint64_t x32 = (x31 >> 0x19);
-{ uint32_t x33 = ((uint32_t)x31 & 0x1ffffff);
-{ uint64_t x34 = (x32 + x26);
-{ uint64_t x35 = (x34 >> 0x19);
-{ uint32_t x36 = ((uint32_t)x34 & 0x1ffffff);
-{ uint64_t x37 = (x35 + x25);
-{ uint64_t x38 = (x37 >> 0x19);
-{ uint32_t x39 = ((uint32_t)x37 & 0x1ffffff);
-{ uint64_t x40 = (x38 + x24);
-{ uint64_t x41 = (x40 >> 0x19);
-{ uint32_t x42 = ((uint32_t)x40 & 0x1ffffff);
-{ uint64_t x43 = (x41 + x23);
-{ uint64_t x44 = (x43 >> 0x19);
-{ uint32_t x45 = ((uint32_t)x43 & 0x1ffffff);
-{ uint64_t x46 = (x44 + x22);
-{ uint64_t x47 = (x46 >> 0x19);
-{ uint32_t x48 = ((uint32_t)x46 & 0x1ffffff);
-{ uint64_t x49 = (x47 + x21);
-{ uint64_t x50 = (x49 >> 0x19);
-{ uint32_t x51 = ((uint32_t)x49 & 0x1ffffff);
-{ uint64_t x52 = (x50 + x20);
-{ uint64_t x53 = (x52 >> 0x19);
-{ uint32_t x54 = ((uint32_t)x52 & 0x1ffffff);
-{ uint64_t x55 = (x53 + x19);
-{ uint64_t x56 = (x55 >> 0x19);
-{ uint32_t x57 = ((uint32_t)x55 & 0x1ffffff);
-{ uint64_t x58 = (x30 + (0x9 * x56));
-{ uint32_t x59 = (uint32_t) (x58 >> 0x1a);
-{ uint32_t x60 = ((uint32_t)x58 & 0x3ffffff);
-{ uint32_t x61 = (x59 + x33);
-{ uint32_t x62 = (x61 >> 0x19);
-{ uint32_t x63 = (x61 & 0x1ffffff);
-out[0] = x57;
-out[1] = x54;
-out[2] = x51;
-out[3] = x48;
-out[4] = x45;
-out[5] = x42;
-out[6] = x39;
-out[7] = x62 + x36;
-out[8] = x63;
-out[9] = x60;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void fesquare(uint32_t out[10], const uint32_t in1[10]) {
+ { const uint32_t x17 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x19 = (((uint64_t)x2 * x17) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x17 * x2))))))))));
+ { uint64_t x20 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x9 * ((uint64_t)x17 * x17)));
+ { uint64_t x21 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x9 * (((uint64_t)x18 * x17) + ((uint64_t)x17 * x18))));
+ { uint64_t x22 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x9 * (((uint64_t)x16 * x17) + (((uint64_t)x18 * x18) + ((uint64_t)x17 * x16)))));
+ { uint64_t x23 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x9 * (((uint64_t)x14 * x17) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((uint64_t)x17 * x14))))));
+ { uint64_t x24 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x9 * (((uint64_t)x12 * x17) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + ((uint64_t)x17 * x12)))))));
+ { uint64_t x25 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x9 * (((uint64_t)x10 * x17) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + ((uint64_t)x17 * x10))))))));
+ { uint64_t x26 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x9 * (((uint64_t)x8 * x17) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + ((uint64_t)x17 * x8)))))))));
+ { uint64_t x27 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x9 * (((uint64_t)x6 * x17) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((uint64_t)x17 * x6))))))))));
+ { uint64_t x28 = (((uint64_t)x2 * x2) + (0x9 * ((0x2 * ((uint64_t)x4 * x17)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + (0x2 * ((uint64_t)x17 * x4))))))))))));
+ { uint64_t x29 = (x28 >> 0x1a);
+ { uint32_t x30 = ((uint32_t)x28 & 0x3ffffff);
+ { uint64_t x31 = (x29 + x27);
+ { uint64_t x32 = (x31 >> 0x19);
+ { uint32_t x33 = ((uint32_t)x31 & 0x1ffffff);
+ { uint64_t x34 = (x32 + x26);
+ { uint64_t x35 = (x34 >> 0x19);
+ { uint32_t x36 = ((uint32_t)x34 & 0x1ffffff);
+ { uint64_t x37 = (x35 + x25);
+ { uint64_t x38 = (x37 >> 0x19);
+ { uint32_t x39 = ((uint32_t)x37 & 0x1ffffff);
+ { uint64_t x40 = (x38 + x24);
+ { uint64_t x41 = (x40 >> 0x19);
+ { uint32_t x42 = ((uint32_t)x40 & 0x1ffffff);
+ { uint64_t x43 = (x41 + x23);
+ { uint64_t x44 = (x43 >> 0x19);
+ { uint32_t x45 = ((uint32_t)x43 & 0x1ffffff);
+ { uint64_t x46 = (x44 + x22);
+ { uint64_t x47 = (x46 >> 0x19);
+ { uint32_t x48 = ((uint32_t)x46 & 0x1ffffff);
+ { uint64_t x49 = (x47 + x21);
+ { uint64_t x50 = (x49 >> 0x19);
+ { uint32_t x51 = ((uint32_t)x49 & 0x1ffffff);
+ { uint64_t x52 = (x50 + x20);
+ { uint64_t x53 = (x52 >> 0x19);
+ { uint32_t x54 = ((uint32_t)x52 & 0x1ffffff);
+ { uint64_t x55 = (x53 + x19);
+ { uint64_t x56 = (x55 >> 0x19);
+ { uint32_t x57 = ((uint32_t)x55 & 0x1ffffff);
+ { uint64_t x58 = (x30 + (0x9 * x56));
+ { uint32_t x59 = (uint32_t) (x58 >> 0x1a);
+ { uint32_t x60 = ((uint32_t)x58 & 0x3ffffff);
+ { uint32_t x61 = (x59 + x33);
+ { uint32_t x62 = (x61 >> 0x19);
+ { uint32_t x63 = (x61 & 0x1ffffff);
+ out[0] = x60;
+ out[1] = x63;
+ out[2] = (x62 + x36);
+ out[3] = x39;
+ out[4] = x42;
+ out[5] = x45;
+ out[6] = x48;
+ out[7] = x51;
+ out[8] = x54;
+ out[9] = x57;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e251m9/freeze.c b/src/Specific/solinas32_2e251m9/freeze.c
index 3ad3614f2..d838290a1 100644
--- a/src/Specific/solinas32_2e251m9/freeze.c
+++ b/src/Specific/solinas32_2e251m9/freeze.c
@@ -1,25 +1,54 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x20;
-out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 26 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3fffff7;;
+static void freeze(uint32_t out[10], const uint32_t in1[10]) {
+ { const uint32_t x17 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x20, uint8_t x21 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x3fffff7);
+ { uint32_t x23, uint8_t x24 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x21, Return x4, 0x1ffffff);
+ { uint32_t x26, uint8_t x27 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x24, Return x6, 0x1ffffff);
+ { uint32_t x29, uint8_t x30 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x27, Return x8, 0x1ffffff);
+ { uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x30, Return x10, 0x1ffffff);
+ { uint32_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x12, 0x1ffffff);
+ { uint32_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x36, Return x14, 0x1ffffff);
+ { uint32_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x39, Return x16, 0x1ffffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x18, 0x1ffffff);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x17, 0x1ffffff);
+ { uint32_t x49 = (uint32_t)cmovznz(x48, 0x0, 0xffffffff);
+ { uint32_t x50 = (x49 & 0x3fffff7);
+ { uint32_t x52, uint8_t x53 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x20, Return x50);
+ { uint32_t x54 = (x49 & 0x1ffffff);
+ { uint32_t x56, uint8_t x57 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x53, Return x23, Return x54);
+ { uint32_t x58 = (x49 & 0x1ffffff);
+ { uint32_t x60, uint8_t x61 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x26, Return x58);
+ { uint32_t x62 = (x49 & 0x1ffffff);
+ { uint32_t x64, uint8_t x65 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x61, Return x29, Return x62);
+ { uint32_t x66 = (x49 & 0x1ffffff);
+ { uint32_t x68, uint8_t x69 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x65, Return x32, Return x66);
+ { uint32_t x70 = (x49 & 0x1ffffff);
+ { uint32_t x72, uint8_t x73 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x69, Return x35, Return x70);
+ { uint32_t x74 = (x49 & 0x1ffffff);
+ { uint32_t x76, uint8_t x77 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x73, Return x38, Return x74);
+ { uint32_t x78 = (x49 & 0x1ffffff);
+ { uint32_t x80, uint8_t x81 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x77, Return x41, Return x78);
+ { uint32_t x82 = (x49 & 0x1ffffff);
+ { uint32_t x84, uint8_t x85 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x81, Return x44, Return x82);
+ { uint32_t x86 = (x49 & 0x1ffffff);
+ { uint32_t x88, uint8_t _ = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x85, Return x47, Return x86);
+ out[0] = x52;
+ out[1] = x56;
+ out[2] = x60;
+ out[3] = x64;
+ out[4] = x68;
+ out[5] = x72;
+ out[6] = x76;
+ out[7] = x80;
+ out[8] = x84;
+ out[9] = x88;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e254m127x2e240m1/freeze.c b/src/Specific/solinas32_2e254m127x2e240m1/freeze.c
index 34c2de5ab..995145d52 100644
--- a/src/Specific/solinas32_2e254m127x2e240m1/freeze.c
+++ b/src/Specific/solinas32_2e254m127x2e240m1/freeze.c
@@ -1,25 +1,59 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x22;
-out[1] = uint8_t x23 = Op Syntax.SubWithGetBorrow 24 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffff;;
+static void freeze(uint32_t out[11], const uint32_t in1[11]) {
+ { const uint32_t x19 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0xffffff);
+ { uint32_t x25, uint8_t x26 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x23, Return x4, 0x7fffff);
+ { uint32_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x26, Return x6, 0x7fffff);
+ { uint32_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x29, Return x8, 0x7fffff);
+ { uint32_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x32, Return x10, 0x7fffff);
+ { uint32_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x35, Return x12, 0x7fffff);
+ { uint32_t x40, uint8_t x41 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x38, Return x14, 0x7fffff);
+ { uint32_t x43, uint8_t x44 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x41, Return x16, 0x7fffff);
+ { uint32_t x46, uint8_t x47 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x44, Return x18, 0x7fffff);
+ { uint32_t x49, uint8_t x50 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x47, Return x20, 0x7fffff);
+ { uint32_t x52, uint8_t x53 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x50, Return x19, 0x7f01ff);
+ { uint32_t x54 = (uint32_t)cmovznz(x53, 0x0, 0xffffffff);
+ { uint32_t x55 = (x54 & 0xffffff);
+ { uint32_t x57, uint8_t x58 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x22, Return x55);
+ { uint32_t x59 = (x54 & 0x7fffff);
+ { uint32_t x61, uint8_t x62 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x58, Return x25, Return x59);
+ { uint32_t x63 = (x54 & 0x7fffff);
+ { uint32_t x65, uint8_t x66 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x62, Return x28, Return x63);
+ { uint32_t x67 = (x54 & 0x7fffff);
+ { uint32_t x69, uint8_t x70 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x66, Return x31, Return x67);
+ { uint32_t x71 = (x54 & 0x7fffff);
+ { uint32_t x73, uint8_t x74 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x70, Return x34, Return x71);
+ { uint32_t x75 = (x54 & 0x7fffff);
+ { uint32_t x77, uint8_t x78 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x74, Return x37, Return x75);
+ { uint32_t x79 = (x54 & 0x7fffff);
+ { uint32_t x81, uint8_t x82 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x78, Return x40, Return x79);
+ { uint32_t x83 = (x54 & 0x7fffff);
+ { uint32_t x85, uint8_t x86 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x82, Return x43, Return x83);
+ { uint32_t x87 = (x54 & 0x7fffff);
+ { uint32_t x89, uint8_t x90 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x86, Return x46, Return x87);
+ { uint32_t x91 = (x54 & 0x7fffff);
+ { uint32_t x93, uint8_t x94 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x90, Return x49, Return x91);
+ { uint32_t x95 = (x54 & 0x7f01ff);
+ { uint32_t x97, uint8_t _ = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x94, Return x52, Return x95);
+ out[0] = x57;
+ out[1] = x61;
+ out[2] = x65;
+ out[3] = x69;
+ out[4] = x73;
+ out[5] = x77;
+ out[6] = x81;
+ out[7] = x85;
+ out[8] = x89;
+ out[9] = x93;
+ out[10] = x97;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e255m19/femul.c b/src/Specific/solinas32_2e255m19/femul.c
index d73360a88..74037da91 100644
--- a/src/Specific/solinas32_2e255m19/femul.c
+++ b/src/Specific/solinas32_2e255m19/femul.c
@@ -1,76 +1,78 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
-{ uint64_t x40 = (((uint64_t)x5 * x38) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + (((uint64_t)x21 * x25) + ((uint64_t)x20 * x23))))))))));
-{ uint64_t x41 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + (((uint64_t)x9 * x35) + ((0x2 * ((uint64_t)x11 * x33)) + (((uint64_t)x13 * x31) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((0x2 * ((uint64_t)x19 * x25)) + ((uint64_t)x21 * x23))))))))) + (0x13 * (0x2 * ((uint64_t)x20 * x38))));
-{ uint64_t x42 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x19 * x23)))))))) + (0x13 * (((uint64_t)x21 * x38) + ((uint64_t)x20 * x39))));
-{ uint64_t x43 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + (((uint64_t)x9 * x31) + ((0x2 * ((uint64_t)x11 * x29)) + (((uint64_t)x13 * x27) + ((0x2 * ((uint64_t)x15 * x25)) + ((uint64_t)x17 * x23))))))) + (0x13 * ((0x2 * ((uint64_t)x19 * x38)) + (((uint64_t)x21 * x39) + (0x2 * ((uint64_t)x20 * x37))))));
-{ uint64_t x44 = ((((uint64_t)x5 * x33) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + ((uint64_t)x15 * x23)))))) + (0x13 * (((uint64_t)x17 * x38) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + ((uint64_t)x20 * x35))))));
-{ uint64_t x45 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + (((uint64_t)x9 * x27) + ((0x2 * ((uint64_t)x11 * x25)) + ((uint64_t)x13 * x23))))) + (0x13 * ((0x2 * ((uint64_t)x15 * x38)) + (((uint64_t)x17 * x39) + ((0x2 * ((uint64_t)x19 * x37)) + (((uint64_t)x21 * x35) + (0x2 * ((uint64_t)x20 * x33))))))));
-{ uint64_t x46 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + ((uint64_t)x11 * x23)))) + (0x13 * (((uint64_t)x13 * x38) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((uint64_t)x20 * x31))))))));
-{ uint64_t x47 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((uint64_t)x9 * x23))) + (0x13 * ((0x2 * ((uint64_t)x11 * x38)) + (((uint64_t)x13 * x39) + ((0x2 * ((uint64_t)x15 * x37)) + (((uint64_t)x17 * x35) + ((0x2 * ((uint64_t)x19 * x33)) + (((uint64_t)x21 * x31) + (0x2 * ((uint64_t)x20 * x29))))))))));
-{ uint64_t x48 = ((((uint64_t)x5 * x25) + ((uint64_t)x7 * x23)) + (0x13 * (((uint64_t)x9 * x38) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x20 * x27))))))))));
-{ uint64_t x49 = (((uint64_t)x5 * x23) + (0x13 * ((0x2 * ((uint64_t)x7 * x38)) + (((uint64_t)x9 * x39) + ((0x2 * ((uint64_t)x11 * x37)) + (((uint64_t)x13 * x35) + ((0x2 * ((uint64_t)x15 * x33)) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + (((uint64_t)x21 * x27) + (0x2 * ((uint64_t)x20 * x25))))))))))));
-{ uint64_t x50 = (x49 >> 0x1a);
-{ uint32_t x51 = ((uint32_t)x49 & 0x3ffffff);
-{ uint64_t x52 = (x50 + x48);
-{ uint64_t x53 = (x52 >> 0x19);
-{ uint32_t x54 = ((uint32_t)x52 & 0x1ffffff);
-{ uint64_t x55 = (x53 + x47);
-{ uint64_t x56 = (x55 >> 0x1a);
-{ uint32_t x57 = ((uint32_t)x55 & 0x3ffffff);
-{ uint64_t x58 = (x56 + x46);
-{ uint64_t x59 = (x58 >> 0x19);
-{ uint32_t x60 = ((uint32_t)x58 & 0x1ffffff);
-{ uint64_t x61 = (x59 + x45);
-{ uint64_t x62 = (x61 >> 0x1a);
-{ uint32_t x63 = ((uint32_t)x61 & 0x3ffffff);
-{ uint64_t x64 = (x62 + x44);
-{ uint64_t x65 = (x64 >> 0x19);
-{ uint32_t x66 = ((uint32_t)x64 & 0x1ffffff);
-{ uint64_t x67 = (x65 + x43);
-{ uint64_t x68 = (x67 >> 0x1a);
-{ uint32_t x69 = ((uint32_t)x67 & 0x3ffffff);
-{ uint64_t x70 = (x68 + x42);
-{ uint64_t x71 = (x70 >> 0x19);
-{ uint32_t x72 = ((uint32_t)x70 & 0x1ffffff);
-{ uint64_t x73 = (x71 + x41);
-{ uint64_t x74 = (x73 >> 0x1a);
-{ uint32_t x75 = ((uint32_t)x73 & 0x3ffffff);
-{ uint64_t x76 = (x74 + x40);
-{ uint64_t x77 = (x76 >> 0x19);
-{ uint32_t x78 = ((uint32_t)x76 & 0x1ffffff);
-{ uint64_t x79 = (x51 + (0x13 * x77));
-{ uint32_t x80 = (uint32_t) (x79 >> 0x1a);
-{ uint32_t x81 = ((uint32_t)x79 & 0x3ffffff);
-{ uint32_t x82 = (x80 + x54);
-{ uint32_t x83 = (x82 >> 0x19);
-{ uint32_t x84 = (x82 & 0x1ffffff);
-out[0] = x78;
-out[1] = x75;
-out[2] = x72;
-out[3] = x69;
-out[4] = x66;
-out[5] = x63;
-out[6] = x60;
-out[7] = x83 + x57;
-out[8] = x84;
-out[9] = x81;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void femul(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) {
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x38 = in2[9];
+ { const uint32_t x39 = in2[8];
+ { const uint32_t x37 = in2[7];
+ { const uint32_t x35 = in2[6];
+ { const uint32_t x33 = in2[5];
+ { const uint32_t x31 = in2[4];
+ { const uint32_t x29 = in2[3];
+ { const uint32_t x27 = in2[2];
+ { const uint32_t x25 = in2[1];
+ { const uint32_t x23 = in2[0];
+ { uint64_t x40 = (((uint64_t)x5 * x38) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + (((uint64_t)x21 * x25) + ((uint64_t)x20 * x23))))))))));
+ { uint64_t x41 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + (((uint64_t)x9 * x35) + ((0x2 * ((uint64_t)x11 * x33)) + (((uint64_t)x13 * x31) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((0x2 * ((uint64_t)x19 * x25)) + ((uint64_t)x21 * x23))))))))) + (0x13 * (0x2 * ((uint64_t)x20 * x38))));
+ { uint64_t x42 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x19 * x23)))))))) + (0x13 * (((uint64_t)x21 * x38) + ((uint64_t)x20 * x39))));
+ { uint64_t x43 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + (((uint64_t)x9 * x31) + ((0x2 * ((uint64_t)x11 * x29)) + (((uint64_t)x13 * x27) + ((0x2 * ((uint64_t)x15 * x25)) + ((uint64_t)x17 * x23))))))) + (0x13 * ((0x2 * ((uint64_t)x19 * x38)) + (((uint64_t)x21 * x39) + (0x2 * ((uint64_t)x20 * x37))))));
+ { uint64_t x44 = ((((uint64_t)x5 * x33) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + ((uint64_t)x15 * x23)))))) + (0x13 * (((uint64_t)x17 * x38) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + ((uint64_t)x20 * x35))))));
+ { uint64_t x45 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + (((uint64_t)x9 * x27) + ((0x2 * ((uint64_t)x11 * x25)) + ((uint64_t)x13 * x23))))) + (0x13 * ((0x2 * ((uint64_t)x15 * x38)) + (((uint64_t)x17 * x39) + ((0x2 * ((uint64_t)x19 * x37)) + (((uint64_t)x21 * x35) + (0x2 * ((uint64_t)x20 * x33))))))));
+ { uint64_t x46 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + ((uint64_t)x11 * x23)))) + (0x13 * (((uint64_t)x13 * x38) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((uint64_t)x20 * x31))))))));
+ { uint64_t x47 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((uint64_t)x9 * x23))) + (0x13 * ((0x2 * ((uint64_t)x11 * x38)) + (((uint64_t)x13 * x39) + ((0x2 * ((uint64_t)x15 * x37)) + (((uint64_t)x17 * x35) + ((0x2 * ((uint64_t)x19 * x33)) + (((uint64_t)x21 * x31) + (0x2 * ((uint64_t)x20 * x29))))))))));
+ { uint64_t x48 = ((((uint64_t)x5 * x25) + ((uint64_t)x7 * x23)) + (0x13 * (((uint64_t)x9 * x38) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x20 * x27))))))))));
+ { uint64_t x49 = (((uint64_t)x5 * x23) + (0x13 * ((0x2 * ((uint64_t)x7 * x38)) + (((uint64_t)x9 * x39) + ((0x2 * ((uint64_t)x11 * x37)) + (((uint64_t)x13 * x35) + ((0x2 * ((uint64_t)x15 * x33)) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + (((uint64_t)x21 * x27) + (0x2 * ((uint64_t)x20 * x25))))))))))));
+ { uint64_t x50 = (x49 >> 0x1a);
+ { uint32_t x51 = ((uint32_t)x49 & 0x3ffffff);
+ { uint64_t x52 = (x50 + x48);
+ { uint64_t x53 = (x52 >> 0x19);
+ { uint32_t x54 = ((uint32_t)x52 & 0x1ffffff);
+ { uint64_t x55 = (x53 + x47);
+ { uint64_t x56 = (x55 >> 0x1a);
+ { uint32_t x57 = ((uint32_t)x55 & 0x3ffffff);
+ { uint64_t x58 = (x56 + x46);
+ { uint64_t x59 = (x58 >> 0x19);
+ { uint32_t x60 = ((uint32_t)x58 & 0x1ffffff);
+ { uint64_t x61 = (x59 + x45);
+ { uint64_t x62 = (x61 >> 0x1a);
+ { uint32_t x63 = ((uint32_t)x61 & 0x3ffffff);
+ { uint64_t x64 = (x62 + x44);
+ { uint64_t x65 = (x64 >> 0x19);
+ { uint32_t x66 = ((uint32_t)x64 & 0x1ffffff);
+ { uint64_t x67 = (x65 + x43);
+ { uint64_t x68 = (x67 >> 0x1a);
+ { uint32_t x69 = ((uint32_t)x67 & 0x3ffffff);
+ { uint64_t x70 = (x68 + x42);
+ { uint64_t x71 = (x70 >> 0x19);
+ { uint32_t x72 = ((uint32_t)x70 & 0x1ffffff);
+ { uint64_t x73 = (x71 + x41);
+ { uint64_t x74 = (x73 >> 0x1a);
+ { uint32_t x75 = ((uint32_t)x73 & 0x3ffffff);
+ { uint64_t x76 = (x74 + x40);
+ { uint64_t x77 = (x76 >> 0x19);
+ { uint32_t x78 = ((uint32_t)x76 & 0x1ffffff);
+ { uint64_t x79 = (x51 + (0x13 * x77));
+ { uint32_t x80 = (uint32_t) (x79 >> 0x1a);
+ { uint32_t x81 = ((uint32_t)x79 & 0x3ffffff);
+ { uint32_t x82 = (x80 + x54);
+ { uint32_t x83 = (x82 >> 0x19);
+ { uint32_t x84 = (x82 & 0x1ffffff);
+ out[0] = x81;
+ out[1] = x84;
+ out[2] = (x83 + x57);
+ out[3] = x60;
+ out[4] = x63;
+ out[5] = x66;
+ out[6] = x69;
+ out[7] = x72;
+ out[8] = x75;
+ out[9] = x78;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e255m19/fesquare.c b/src/Specific/solinas32_2e255m19/fesquare.c
index 95a421f6c..877264dce 100644
--- a/src/Specific/solinas32_2e255m19/fesquare.c
+++ b/src/Specific/solinas32_2e255m19/fesquare.c
@@ -1,76 +1,68 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x19 = (((uint64_t)x2 * x17) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x17 * x2))))))))));
-{ uint64_t x20 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + ((0x2 * ((uint64_t)x8 * x12)) + (((uint64_t)x10 * x10) + ((0x2 * ((uint64_t)x12 * x8)) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x13 * (0x2 * ((uint64_t)x17 * x17))));
-{ uint64_t x21 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x13 * (((uint64_t)x18 * x17) + ((uint64_t)x17 * x18))));
-{ uint64_t x22 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + (((uint64_t)x6 * x10) + ((0x2 * ((uint64_t)x8 * x8)) + (((uint64_t)x10 * x6) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x13 * ((0x2 * ((uint64_t)x16 * x17)) + (((uint64_t)x18 * x18) + (0x2 * ((uint64_t)x17 * x16))))));
-{ uint64_t x23 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x13 * (((uint64_t)x14 * x17) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((uint64_t)x17 * x14))))));
-{ uint64_t x24 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + (((uint64_t)x6 * x6) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x13 * ((0x2 * ((uint64_t)x12 * x17)) + (((uint64_t)x14 * x18) + ((0x2 * ((uint64_t)x16 * x16)) + (((uint64_t)x18 * x14) + (0x2 * ((uint64_t)x17 * x12))))))));
-{ uint64_t x25 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x13 * (((uint64_t)x10 * x17) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + ((uint64_t)x17 * x10))))))));
-{ uint64_t x26 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x13 * ((0x2 * ((uint64_t)x8 * x17)) + (((uint64_t)x10 * x18) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + (((uint64_t)x18 * x10) + (0x2 * ((uint64_t)x17 * x8))))))))));
-{ uint64_t x27 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x13 * (((uint64_t)x6 * x17) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((uint64_t)x17 * x6))))))))));
-{ uint64_t x28 = (((uint64_t)x2 * x2) + (0x13 * ((0x2 * ((uint64_t)x4 * x17)) + (((uint64_t)x6 * x18) + ((0x2 * ((uint64_t)x8 * x16)) + (((uint64_t)x10 * x14) + ((0x2 * ((uint64_t)x12 * x12)) + (((uint64_t)x14 * x10) + ((0x2 * ((uint64_t)x16 * x8)) + (((uint64_t)x18 * x6) + (0x2 * ((uint64_t)x17 * x4))))))))))));
-{ uint64_t x29 = (x28 >> 0x1a);
-{ uint32_t x30 = ((uint32_t)x28 & 0x3ffffff);
-{ uint64_t x31 = (x29 + x27);
-{ uint64_t x32 = (x31 >> 0x19);
-{ uint32_t x33 = ((uint32_t)x31 & 0x1ffffff);
-{ uint64_t x34 = (x32 + x26);
-{ uint64_t x35 = (x34 >> 0x1a);
-{ uint32_t x36 = ((uint32_t)x34 & 0x3ffffff);
-{ uint64_t x37 = (x35 + x25);
-{ uint64_t x38 = (x37 >> 0x19);
-{ uint32_t x39 = ((uint32_t)x37 & 0x1ffffff);
-{ uint64_t x40 = (x38 + x24);
-{ uint64_t x41 = (x40 >> 0x1a);
-{ uint32_t x42 = ((uint32_t)x40 & 0x3ffffff);
-{ uint64_t x43 = (x41 + x23);
-{ uint64_t x44 = (x43 >> 0x19);
-{ uint32_t x45 = ((uint32_t)x43 & 0x1ffffff);
-{ uint64_t x46 = (x44 + x22);
-{ uint64_t x47 = (x46 >> 0x1a);
-{ uint32_t x48 = ((uint32_t)x46 & 0x3ffffff);
-{ uint64_t x49 = (x47 + x21);
-{ uint64_t x50 = (x49 >> 0x19);
-{ uint32_t x51 = ((uint32_t)x49 & 0x1ffffff);
-{ uint64_t x52 = (x50 + x20);
-{ uint64_t x53 = (x52 >> 0x1a);
-{ uint32_t x54 = ((uint32_t)x52 & 0x3ffffff);
-{ uint64_t x55 = (x53 + x19);
-{ uint64_t x56 = (x55 >> 0x19);
-{ uint32_t x57 = ((uint32_t)x55 & 0x1ffffff);
-{ uint64_t x58 = (x30 + (0x13 * x56));
-{ uint32_t x59 = (uint32_t) (x58 >> 0x1a);
-{ uint32_t x60 = ((uint32_t)x58 & 0x3ffffff);
-{ uint32_t x61 = (x59 + x33);
-{ uint32_t x62 = (x61 >> 0x19);
-{ uint32_t x63 = (x61 & 0x1ffffff);
-out[0] = x57;
-out[1] = x54;
-out[2] = x51;
-out[3] = x48;
-out[4] = x45;
-out[5] = x42;
-out[6] = x39;
-out[7] = x62 + x36;
-out[8] = x63;
-out[9] = x60;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void fesquare(uint32_t out[10], const uint32_t in1[10]) {
+ { const uint32_t x17 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x19 = (((uint64_t)x2 * x17) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x17 * x2))))))))));
+ { uint64_t x20 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + ((0x2 * ((uint64_t)x8 * x12)) + (((uint64_t)x10 * x10) + ((0x2 * ((uint64_t)x12 * x8)) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x13 * (0x2 * ((uint64_t)x17 * x17))));
+ { uint64_t x21 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x13 * (((uint64_t)x18 * x17) + ((uint64_t)x17 * x18))));
+ { uint64_t x22 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + (((uint64_t)x6 * x10) + ((0x2 * ((uint64_t)x8 * x8)) + (((uint64_t)x10 * x6) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x13 * ((0x2 * ((uint64_t)x16 * x17)) + (((uint64_t)x18 * x18) + (0x2 * ((uint64_t)x17 * x16))))));
+ { uint64_t x23 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x13 * (((uint64_t)x14 * x17) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((uint64_t)x17 * x14))))));
+ { uint64_t x24 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + (((uint64_t)x6 * x6) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x13 * ((0x2 * ((uint64_t)x12 * x17)) + (((uint64_t)x14 * x18) + ((0x2 * ((uint64_t)x16 * x16)) + (((uint64_t)x18 * x14) + (0x2 * ((uint64_t)x17 * x12))))))));
+ { uint64_t x25 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x13 * (((uint64_t)x10 * x17) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + ((uint64_t)x17 * x10))))))));
+ { uint64_t x26 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x13 * ((0x2 * ((uint64_t)x8 * x17)) + (((uint64_t)x10 * x18) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + (((uint64_t)x18 * x10) + (0x2 * ((uint64_t)x17 * x8))))))))));
+ { uint64_t x27 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x13 * (((uint64_t)x6 * x17) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((uint64_t)x17 * x6))))))))));
+ { uint64_t x28 = (((uint64_t)x2 * x2) + (0x13 * ((0x2 * ((uint64_t)x4 * x17)) + (((uint64_t)x6 * x18) + ((0x2 * ((uint64_t)x8 * x16)) + (((uint64_t)x10 * x14) + ((0x2 * ((uint64_t)x12 * x12)) + (((uint64_t)x14 * x10) + ((0x2 * ((uint64_t)x16 * x8)) + (((uint64_t)x18 * x6) + (0x2 * ((uint64_t)x17 * x4))))))))))));
+ { uint64_t x29 = (x28 >> 0x1a);
+ { uint32_t x30 = ((uint32_t)x28 & 0x3ffffff);
+ { uint64_t x31 = (x29 + x27);
+ { uint64_t x32 = (x31 >> 0x19);
+ { uint32_t x33 = ((uint32_t)x31 & 0x1ffffff);
+ { uint64_t x34 = (x32 + x26);
+ { uint64_t x35 = (x34 >> 0x1a);
+ { uint32_t x36 = ((uint32_t)x34 & 0x3ffffff);
+ { uint64_t x37 = (x35 + x25);
+ { uint64_t x38 = (x37 >> 0x19);
+ { uint32_t x39 = ((uint32_t)x37 & 0x1ffffff);
+ { uint64_t x40 = (x38 + x24);
+ { uint64_t x41 = (x40 >> 0x1a);
+ { uint32_t x42 = ((uint32_t)x40 & 0x3ffffff);
+ { uint64_t x43 = (x41 + x23);
+ { uint64_t x44 = (x43 >> 0x19);
+ { uint32_t x45 = ((uint32_t)x43 & 0x1ffffff);
+ { uint64_t x46 = (x44 + x22);
+ { uint64_t x47 = (x46 >> 0x1a);
+ { uint32_t x48 = ((uint32_t)x46 & 0x3ffffff);
+ { uint64_t x49 = (x47 + x21);
+ { uint64_t x50 = (x49 >> 0x19);
+ { uint32_t x51 = ((uint32_t)x49 & 0x1ffffff);
+ { uint64_t x52 = (x50 + x20);
+ { uint64_t x53 = (x52 >> 0x1a);
+ { uint32_t x54 = ((uint32_t)x52 & 0x3ffffff);
+ { uint64_t x55 = (x53 + x19);
+ { uint64_t x56 = (x55 >> 0x19);
+ { uint32_t x57 = ((uint32_t)x55 & 0x1ffffff);
+ { uint64_t x58 = (x30 + (0x13 * x56));
+ { uint32_t x59 = (uint32_t) (x58 >> 0x1a);
+ { uint32_t x60 = ((uint32_t)x58 & 0x3ffffff);
+ { uint32_t x61 = (x59 + x33);
+ { uint32_t x62 = (x61 >> 0x19);
+ { uint32_t x63 = (x61 & 0x1ffffff);
+ out[0] = x60;
+ out[1] = x63;
+ out[2] = (x62 + x36);
+ out[3] = x39;
+ out[4] = x42;
+ out[5] = x45;
+ out[6] = x48;
+ out[7] = x51;
+ out[8] = x54;
+ out[9] = x57;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e255m19/freeze.c b/src/Specific/solinas32_2e255m19/freeze.c
index 8ee39b49f..a8e8fdb17 100644
--- a/src/Specific/solinas32_2e255m19/freeze.c
+++ b/src/Specific/solinas32_2e255m19/freeze.c
@@ -1,25 +1,54 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x20;
-out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 26 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3ffffed;;
+static void freeze(uint32_t out[10], const uint32_t in1[10]) {
+ { const uint32_t x17 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x20, uint8_t x21 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x3ffffed);
+ { uint32_t x23, uint8_t x24 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x21, Return x4, 0x1ffffff);
+ { uint32_t x26, uint8_t x27 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x24, Return x6, 0x3ffffff);
+ { uint32_t x29, uint8_t x30 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x27, Return x8, 0x1ffffff);
+ { uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x30, Return x10, 0x3ffffff);
+ { uint32_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x12, 0x1ffffff);
+ { uint32_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x36, Return x14, 0x3ffffff);
+ { uint32_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x39, Return x16, 0x1ffffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x18, 0x3ffffff);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x17, 0x1ffffff);
+ { uint32_t x49 = (uint32_t)cmovznz(x48, 0x0, 0xffffffff);
+ { uint32_t x50 = (x49 & 0x3ffffed);
+ { uint32_t x52, uint8_t x53 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x20, Return x50);
+ { uint32_t x54 = (x49 & 0x1ffffff);
+ { uint32_t x56, uint8_t x57 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x53, Return x23, Return x54);
+ { uint32_t x58 = (x49 & 0x3ffffff);
+ { uint32_t x60, uint8_t x61 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x26, Return x58);
+ { uint32_t x62 = (x49 & 0x1ffffff);
+ { uint32_t x64, uint8_t x65 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x61, Return x29, Return x62);
+ { uint32_t x66 = (x49 & 0x3ffffff);
+ { uint32_t x68, uint8_t x69 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x65, Return x32, Return x66);
+ { uint32_t x70 = (x49 & 0x1ffffff);
+ { uint32_t x72, uint8_t x73 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x69, Return x35, Return x70);
+ { uint32_t x74 = (x49 & 0x3ffffff);
+ { uint32_t x76, uint8_t x77 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x73, Return x38, Return x74);
+ { uint32_t x78 = (x49 & 0x1ffffff);
+ { uint32_t x80, uint8_t x81 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x77, Return x41, Return x78);
+ { uint32_t x82 = (x49 & 0x3ffffff);
+ { uint32_t x84, uint8_t x85 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x81, Return x44, Return x82);
+ { uint32_t x86 = (x49 & 0x1ffffff);
+ { uint32_t x88, uint8_t _ = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x85, Return x47, Return x86);
+ out[0] = x52;
+ out[1] = x56;
+ out[2] = x60;
+ out[3] = x64;
+ out[4] = x68;
+ out[5] = x72;
+ out[6] = x76;
+ out[7] = x80;
+ out[8] = x84;
+ out[9] = x88;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e255m2e4m2e1m1/femul.c b/src/Specific/solinas32_2e255m2e4m2e1m1/femul.c
index 58eeb52fd..c7206a31e 100644
--- a/src/Specific/solinas32_2e255m2e4m2e1m1/femul.c
+++ b/src/Specific/solinas32_2e255m2e4m2e1m1/femul.c
@@ -1,84 +1,84 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
-{ uint64_t x36 = (((uint64_t)x5 * x34) + ((0x2 * ((uint64_t)x7 * x35)) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + ((0x2 * ((uint64_t)x13 * x29)) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((0x2 * ((uint64_t)x19 * x23)) + ((uint64_t)x18 * x21)))))))));
-{ ℤ x37 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x19 * x21)))))))) +ℤ (((uint64_t)x18 * x34) + ((0x2 * ((uint64_t)x18 * x34)) + (0x10 * ((uint64_t)x18 * x34)))));
-{ ℤ x38 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + (((uint64_t)x11 * x27) + ((0x2 * ((uint64_t)x13 * x25)) + ((0x2 * ((uint64_t)x15 * x23)) + ((uint64_t)x17 * x21))))))) +ℤ (((0x2 * ((uint64_t)x19 * x34)) + (0x2 * ((uint64_t)x18 * x35))) +ℤ ((0x2 * ((0x2 * ((uint64_t)x19 * x34)) + (0x2 * ((uint64_t)x18 * x35)))) +ℤ (0x10 *ℤ ((0x2 * ((uint64_t)x19 * x34)) + (0x2 * ((uint64_t)x18 * x35)))))));
-{ ℤ x39 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + ((0x2 * ((uint64_t)x13 * x23)) + ((uint64_t)x15 * x21)))))) +ℤ ((((uint64_t)x17 * x34) + ((0x2 * ((uint64_t)x19 * x35)) + ((uint64_t)x18 * x33))) +ℤ ((0x2 * (((uint64_t)x17 * x34) + ((0x2 * ((uint64_t)x19 * x35)) + ((uint64_t)x18 * x33)))) +ℤ (0x10 *ℤ (((uint64_t)x17 * x34) + ((0x2 * ((uint64_t)x19 * x35)) + ((uint64_t)x18 * x33)))))));
-{ ℤ x40 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + ((uint64_t)x13 * x21))))) +ℤ ((((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31)))) +ℤ ((0x2 * (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))) +ℤ (0x10 *ℤ (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))))));
-{ ℤ x41 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((uint64_t)x11 * x21)))) +ℤ (((0x2 * ((uint64_t)x13 * x34)) + ((0x2 * ((uint64_t)x15 * x35)) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + (0x2 * ((uint64_t)x18 * x29)))))) +ℤ ((0x2 *ℤ ((0x2 * ((uint64_t)x13 * x34)) + ((0x2 * ((uint64_t)x15 * x35)) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + (0x2 * ((uint64_t)x18 * x29))))))) +ℤ (0x10 *ℤ ((0x2 * ((uint64_t)x13 * x34)) + ((0x2 * ((uint64_t)x15 * x35)) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + (0x2 * ((uint64_t)x18 * x29))))))))));
-{ ℤ x42 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((uint64_t)x9 * x21))) +ℤ ((((uint64_t)x11 * x34) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x18 * x27)))))) +ℤ ((0x2 *ℤ (((uint64_t)x11 * x34) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x18 * x27))))))) +ℤ (0x10 *ℤ (((uint64_t)x11 * x34) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x18 * x27))))))))));
-{ ℤ x43 = ((((uint64_t)x5 * x23) + ((uint64_t)x7 * x21)) +ℤ ((((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25))))))) +ℤ ((0x2 * (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))) +ℤ (0x10 *ℤ (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))))));
-{ ℤ x44 = (((uint64_t)x5 * x21) +ℤ (((0x2 * ((uint64_t)x7 * x34)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((0x2 * ((uint64_t)x19 * x25)) + (0x2 * ((uint64_t)x18 * x23))))))))) +ℤ ((0x2 *ℤ ((0x2 * ((uint64_t)x7 * x34)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((0x2 * ((uint64_t)x19 * x25)) + (0x2 * ((uint64_t)x18 * x23)))))))))) +ℤ (0x10 *ℤ ((0x2 * ((uint64_t)x7 * x34)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((0x2 * ((uint64_t)x19 * x25)) + (0x2 * ((uint64_t)x18 * x23)))))))))))));
-{ uint64_t x45 = (x36 >> 0x1c);
-{ uint32_t x46 = ((uint32_t)x36 & 0xfffffff);
-{ uint64_t x47 = ((0x10000000 * x45) + x46);
-{ uint64_t x48 = (x47 >> 0x1c);
-{ uint32_t x49 = ((uint32_t)x47 & 0xfffffff);
-{ uint64_t x50 = ((0x10000000 * x48) + x49);
-{ uint64_t x51 = (x50 >> 0x1c);
-{ uint32_t x52 = ((uint32_t)x50 & 0xfffffff);
-{ uint64_t x53 = ((0x10000000 * x51) + x52);
-{ uint64_t x54 = (x53 >> 0x1c);
-{ uint32_t x55 = ((uint32_t)x53 & 0xfffffff);
-{ ℤ x56 = (x44 +ℤ (x54 + ((0x2 * x54) + (0x10 * x54))));
-{ uint64_t x57 = (x56 >> 0x1d);
-{ uint32_t x58 = (x56 & 0x1fffffff);
-{ ℤ x59 = (x57 +ℤ x43);
-{ uint64_t x60 = (x59 >> 0x1c);
-{ uint32_t x61 = (x59 & 0xfffffff);
-{ ℤ x62 = (x60 +ℤ x42);
-{ uint64_t x63 = (x62 >> 0x1c);
-{ uint32_t x64 = (x62 & 0xfffffff);
-{ ℤ x65 = (x63 +ℤ x41);
-{ uint64_t x66 = (x65 >> 0x1d);
-{ uint32_t x67 = (x65 & 0x1fffffff);
-{ ℤ x68 = (x66 +ℤ x40);
-{ uint64_t x69 = (x68 >> 0x1c);
-{ uint32_t x70 = (x68 & 0xfffffff);
-{ ℤ x71 = (x69 +ℤ x39);
-{ uint64_t x72 = (x71 >> 0x1c);
-{ uint32_t x73 = (x71 & 0xfffffff);
-{ ℤ x74 = (x72 +ℤ x38);
-{ uint64_t x75 = (x74 >> 0x1d);
-{ uint32_t x76 = (x74 & 0x1fffffff);
-{ ℤ x77 = (x75 +ℤ x37);
-{ uint64_t x78 = (x77 >> 0x1c);
-{ uint32_t x79 = (x77 & 0xfffffff);
-{ uint64_t x80 = (x78 + x55);
-{ uint32_t x81 = (uint32_t) (x80 >> 0x1c);
-{ uint32_t x82 = ((uint32_t)x80 & 0xfffffff);
-{ uint32_t x83 = (x58 + (x81 + ((0x2 * x81) + (0x10 * x81))));
-{ uint32_t x84 = (x83 >> 0x1d);
-{ uint32_t x85 = (x83 & 0x1fffffff);
-{ uint32_t x86 = (x85 >> 0x1d);
-{ uint32_t x87 = (x85 & 0x1fffffff);
-{ uint32_t x88 = (x87 >> 0x1d);
-{ uint32_t x89 = (x87 & 0x1fffffff);
-out[0] = x82;
-out[1] = x79;
-out[2] = x76;
-out[3] = x73;
-out[4] = x70;
-out[5] = x67;
-out[6] = x64;
-out[7] = x88 + x86 + x84 + x61;
-out[8] = x89;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void femul(uint32_t out[9], const uint32_t in1[9], const uint32_t in2[9]) {
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x34 = in2[8];
+ { const uint32_t x35 = in2[7];
+ { const uint32_t x33 = in2[6];
+ { const uint32_t x31 = in2[5];
+ { const uint32_t x29 = in2[4];
+ { const uint32_t x27 = in2[3];
+ { const uint32_t x25 = in2[2];
+ { const uint32_t x23 = in2[1];
+ { const uint32_t x21 = in2[0];
+ { uint64_t x36 = (((uint64_t)x5 * x34) + ((0x2 * ((uint64_t)x7 * x35)) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + ((0x2 * ((uint64_t)x13 * x29)) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((0x2 * ((uint64_t)x19 * x23)) + ((uint64_t)x18 * x21)))))))));
+ { ℤ x37 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x19 * x21)))))))) +ℤ (((uint64_t)x18 * x34) + ((0x2 * ((uint64_t)x18 * x34)) + (0x10 * ((uint64_t)x18 * x34)))));
+ { ℤ x38 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + (((uint64_t)x11 * x27) + ((0x2 * ((uint64_t)x13 * x25)) + ((0x2 * ((uint64_t)x15 * x23)) + ((uint64_t)x17 * x21))))))) +ℤ (((0x2 * ((uint64_t)x19 * x34)) + (0x2 * ((uint64_t)x18 * x35))) +ℤ ((0x2 * ((0x2 * ((uint64_t)x19 * x34)) + (0x2 * ((uint64_t)x18 * x35)))) +ℤ (0x10 *ℤ ((0x2 * ((uint64_t)x19 * x34)) + (0x2 * ((uint64_t)x18 * x35)))))));
+ { ℤ x39 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + ((0x2 * ((uint64_t)x13 * x23)) + ((uint64_t)x15 * x21)))))) +ℤ ((((uint64_t)x17 * x34) + ((0x2 * ((uint64_t)x19 * x35)) + ((uint64_t)x18 * x33))) +ℤ ((0x2 * (((uint64_t)x17 * x34) + ((0x2 * ((uint64_t)x19 * x35)) + ((uint64_t)x18 * x33)))) +ℤ (0x10 *ℤ (((uint64_t)x17 * x34) + ((0x2 * ((uint64_t)x19 * x35)) + ((uint64_t)x18 * x33)))))));
+ { ℤ x40 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + ((uint64_t)x13 * x21))))) +ℤ ((((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31)))) +ℤ ((0x2 * (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))) +ℤ (0x10 *ℤ (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))))));
+ { ℤ x41 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((uint64_t)x11 * x21)))) +ℤ (((0x2 * ((uint64_t)x13 * x34)) + ((0x2 * ((uint64_t)x15 * x35)) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + (0x2 * ((uint64_t)x18 * x29)))))) +ℤ ((0x2 *ℤ ((0x2 * ((uint64_t)x13 * x34)) + ((0x2 * ((uint64_t)x15 * x35)) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + (0x2 * ((uint64_t)x18 * x29))))))) +ℤ (0x10 *ℤ ((0x2 * ((uint64_t)x13 * x34)) + ((0x2 * ((uint64_t)x15 * x35)) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + (0x2 * ((uint64_t)x18 * x29))))))))));
+ { ℤ x42 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((uint64_t)x9 * x21))) +ℤ ((((uint64_t)x11 * x34) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x18 * x27)))))) +ℤ ((0x2 *ℤ (((uint64_t)x11 * x34) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x18 * x27))))))) +ℤ (0x10 *ℤ (((uint64_t)x11 * x34) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x18 * x27))))))))));
+ { ℤ x43 = ((((uint64_t)x5 * x23) + ((uint64_t)x7 * x21)) +ℤ ((((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25))))))) +ℤ ((0x2 * (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))) +ℤ (0x10 *ℤ (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))))));
+ { ℤ x44 = (((uint64_t)x5 * x21) +ℤ (((0x2 * ((uint64_t)x7 * x34)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((0x2 * ((uint64_t)x19 * x25)) + (0x2 * ((uint64_t)x18 * x23))))))))) +ℤ ((0x2 *ℤ ((0x2 * ((uint64_t)x7 * x34)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((0x2 * ((uint64_t)x19 * x25)) + (0x2 * ((uint64_t)x18 * x23)))))))))) +ℤ (0x10 *ℤ ((0x2 * ((uint64_t)x7 * x34)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((0x2 * ((uint64_t)x19 * x25)) + (0x2 * ((uint64_t)x18 * x23)))))))))))));
+ { uint64_t x45 = (x36 >> 0x1c);
+ { uint32_t x46 = ((uint32_t)x36 & 0xfffffff);
+ { uint64_t x47 = ((0x10000000 * x45) + x46);
+ { uint64_t x48 = (x47 >> 0x1c);
+ { uint32_t x49 = ((uint32_t)x47 & 0xfffffff);
+ { uint64_t x50 = ((0x10000000 * x48) + x49);
+ { uint64_t x51 = (x50 >> 0x1c);
+ { uint32_t x52 = ((uint32_t)x50 & 0xfffffff);
+ { uint64_t x53 = ((0x10000000 * x51) + x52);
+ { uint64_t x54 = (x53 >> 0x1c);
+ { uint32_t x55 = ((uint32_t)x53 & 0xfffffff);
+ { ℤ x56 = (x44 +ℤ (x54 + ((0x2 * x54) + (0x10 * x54))));
+ { uint64_t x57 = (x56 >> 0x1d);
+ { uint32_t x58 = (x56 & 0x1fffffff);
+ { ℤ x59 = (x57 +ℤ x43);
+ { uint64_t x60 = (x59 >> 0x1c);
+ { uint32_t x61 = (x59 & 0xfffffff);
+ { ℤ x62 = (x60 +ℤ x42);
+ { uint64_t x63 = (x62 >> 0x1c);
+ { uint32_t x64 = (x62 & 0xfffffff);
+ { ℤ x65 = (x63 +ℤ x41);
+ { uint64_t x66 = (x65 >> 0x1d);
+ { uint32_t x67 = (x65 & 0x1fffffff);
+ { ℤ x68 = (x66 +ℤ x40);
+ { uint64_t x69 = (x68 >> 0x1c);
+ { uint32_t x70 = (x68 & 0xfffffff);
+ { ℤ x71 = (x69 +ℤ x39);
+ { uint64_t x72 = (x71 >> 0x1c);
+ { uint32_t x73 = (x71 & 0xfffffff);
+ { ℤ x74 = (x72 +ℤ x38);
+ { uint64_t x75 = (x74 >> 0x1d);
+ { uint32_t x76 = (x74 & 0x1fffffff);
+ { ℤ x77 = (x75 +ℤ x37);
+ { uint64_t x78 = (x77 >> 0x1c);
+ { uint32_t x79 = (x77 & 0xfffffff);
+ { uint64_t x80 = (x78 + x55);
+ { uint32_t x81 = (uint32_t) (x80 >> 0x1c);
+ { uint32_t x82 = ((uint32_t)x80 & 0xfffffff);
+ { uint32_t x83 = (x58 + (x81 + ((0x2 * x81) + (0x10 * x81))));
+ { uint32_t x84 = (x83 >> 0x1d);
+ { uint32_t x85 = (x83 & 0x1fffffff);
+ { uint32_t x86 = (x85 >> 0x1d);
+ { uint32_t x87 = (x85 & 0x1fffffff);
+ { uint32_t x88 = (x87 >> 0x1d);
+ { uint32_t x89 = (x87 & 0x1fffffff);
+ out[0] = x89;
+ out[1] = (x88 + (x86 + (x84 + x61)));
+ out[2] = x64;
+ out[3] = x67;
+ out[4] = x70;
+ out[5] = x73;
+ out[6] = x76;
+ out[7] = x79;
+ out[8] = x82;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e255m2e4m2e1m1/fesquare.c b/src/Specific/solinas32_2e255m2e4m2e1m1/fesquare.c
index 6c2274f91..dda248efc 100644
--- a/src/Specific/solinas32_2e255m2e4m2e1m1/fesquare.c
+++ b/src/Specific/solinas32_2e255m2e4m2e1m1/fesquare.c
@@ -1,84 +1,75 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x17 = (((uint64_t)x2 * x15) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + ((0x2 * ((uint64_t)x10 * x10)) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x15 * x2)))))))));
-{ ℤ x18 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) +ℤ (((uint64_t)x15 * x15) + ((0x2 * ((uint64_t)x15 * x15)) + (0x10 * ((uint64_t)x15 * x15)))));
-{ ℤ x19 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) +ℤ (((0x2 * ((uint64_t)x16 * x15)) + (0x2 * ((uint64_t)x15 * x16))) +ℤ ((0x2 * ((0x2 * ((uint64_t)x16 * x15)) + (0x2 * ((uint64_t)x15 * x16)))) +ℤ (0x10 *ℤ ((0x2 * ((uint64_t)x16 * x15)) + (0x2 * ((uint64_t)x15 * x16)))))));
-{ ℤ x20 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) +ℤ ((((uint64_t)x14 * x15) + ((0x2 * ((uint64_t)x16 * x16)) + ((uint64_t)x15 * x14))) +ℤ ((0x2 * (((uint64_t)x14 * x15) + ((0x2 * ((uint64_t)x16 * x16)) + ((uint64_t)x15 * x14)))) +ℤ (0x10 *ℤ (((uint64_t)x14 * x15) + ((0x2 * ((uint64_t)x16 * x16)) + ((uint64_t)x15 * x14)))))));
-{ ℤ x21 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) +ℤ ((((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12)))) +ℤ ((0x2 * (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))) +ℤ (0x10 *ℤ (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))))));
-{ ℤ x22 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) +ℤ (((0x2 * ((uint64_t)x10 * x15)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + (0x2 * ((uint64_t)x15 * x10)))))) +ℤ ((0x2 *ℤ ((0x2 * ((uint64_t)x10 * x15)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + (0x2 * ((uint64_t)x15 * x10))))))) +ℤ (0x10 *ℤ ((0x2 * ((uint64_t)x10 * x15)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + (0x2 * ((uint64_t)x15 * x10))))))))));
-{ ℤ x23 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) +ℤ ((((uint64_t)x8 * x15) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + ((uint64_t)x15 * x8)))))) +ℤ ((0x2 *ℤ (((uint64_t)x8 * x15) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + ((uint64_t)x15 * x8))))))) +ℤ (0x10 *ℤ (((uint64_t)x8 * x15) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + ((uint64_t)x15 * x8))))))))));
-{ ℤ x24 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) +ℤ ((((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6))))))) +ℤ ((0x2 * (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))) +ℤ (0x10 *ℤ (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))))));
-{ ℤ x25 = (((uint64_t)x2 * x2) +ℤ (((0x2 * ((uint64_t)x4 * x15)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + (0x2 * ((uint64_t)x15 * x4))))))))) +ℤ ((0x2 *ℤ ((0x2 * ((uint64_t)x4 * x15)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + (0x2 * ((uint64_t)x15 * x4)))))))))) +ℤ (0x10 *ℤ ((0x2 * ((uint64_t)x4 * x15)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + (0x2 * ((uint64_t)x15 * x4)))))))))))));
-{ uint64_t x26 = (x17 >> 0x1c);
-{ uint32_t x27 = ((uint32_t)x17 & 0xfffffff);
-{ uint64_t x28 = ((0x10000000 * x26) + x27);
-{ uint64_t x29 = (x28 >> 0x1c);
-{ uint32_t x30 = ((uint32_t)x28 & 0xfffffff);
-{ uint64_t x31 = ((0x10000000 * x29) + x30);
-{ uint64_t x32 = (x31 >> 0x1c);
-{ uint32_t x33 = ((uint32_t)x31 & 0xfffffff);
-{ uint64_t x34 = ((0x10000000 * x32) + x33);
-{ uint64_t x35 = (x34 >> 0x1c);
-{ uint32_t x36 = ((uint32_t)x34 & 0xfffffff);
-{ ℤ x37 = (x25 +ℤ (x35 + ((0x2 * x35) + (0x10 * x35))));
-{ uint64_t x38 = (x37 >> 0x1d);
-{ uint32_t x39 = (x37 & 0x1fffffff);
-{ ℤ x40 = (x38 +ℤ x24);
-{ uint64_t x41 = (x40 >> 0x1c);
-{ uint32_t x42 = (x40 & 0xfffffff);
-{ ℤ x43 = (x41 +ℤ x23);
-{ uint64_t x44 = (x43 >> 0x1c);
-{ uint32_t x45 = (x43 & 0xfffffff);
-{ ℤ x46 = (x44 +ℤ x22);
-{ uint64_t x47 = (x46 >> 0x1d);
-{ uint32_t x48 = (x46 & 0x1fffffff);
-{ ℤ x49 = (x47 +ℤ x21);
-{ uint64_t x50 = (x49 >> 0x1c);
-{ uint32_t x51 = (x49 & 0xfffffff);
-{ ℤ x52 = (x50 +ℤ x20);
-{ uint64_t x53 = (x52 >> 0x1c);
-{ uint32_t x54 = (x52 & 0xfffffff);
-{ ℤ x55 = (x53 +ℤ x19);
-{ uint64_t x56 = (x55 >> 0x1d);
-{ uint32_t x57 = (x55 & 0x1fffffff);
-{ ℤ x58 = (x56 +ℤ x18);
-{ uint64_t x59 = (x58 >> 0x1c);
-{ uint32_t x60 = (x58 & 0xfffffff);
-{ uint64_t x61 = (x59 + x36);
-{ uint32_t x62 = (uint32_t) (x61 >> 0x1c);
-{ uint32_t x63 = ((uint32_t)x61 & 0xfffffff);
-{ uint32_t x64 = (x39 + (x62 + ((0x2 * x62) + (0x10 * x62))));
-{ uint32_t x65 = (x64 >> 0x1d);
-{ uint32_t x66 = (x64 & 0x1fffffff);
-{ uint32_t x67 = (x66 >> 0x1d);
-{ uint32_t x68 = (x66 & 0x1fffffff);
-{ uint32_t x69 = (x68 >> 0x1d);
-{ uint32_t x70 = (x68 & 0x1fffffff);
-out[0] = x63;
-out[1] = x60;
-out[2] = x57;
-out[3] = x54;
-out[4] = x51;
-out[5] = x48;
-out[6] = x45;
-out[7] = x69 + x67 + x65 + x42;
-out[8] = x70;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void fesquare(uint32_t out[9], const uint32_t in1[9]) {
+ { const uint32_t x15 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x17 = (((uint64_t)x2 * x15) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + ((0x2 * ((uint64_t)x10 * x10)) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x15 * x2)))))))));
+ { ℤ x18 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) +ℤ (((uint64_t)x15 * x15) + ((0x2 * ((uint64_t)x15 * x15)) + (0x10 * ((uint64_t)x15 * x15)))));
+ { ℤ x19 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) +ℤ (((0x2 * ((uint64_t)x16 * x15)) + (0x2 * ((uint64_t)x15 * x16))) +ℤ ((0x2 * ((0x2 * ((uint64_t)x16 * x15)) + (0x2 * ((uint64_t)x15 * x16)))) +ℤ (0x10 *ℤ ((0x2 * ((uint64_t)x16 * x15)) + (0x2 * ((uint64_t)x15 * x16)))))));
+ { ℤ x20 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) +ℤ ((((uint64_t)x14 * x15) + ((0x2 * ((uint64_t)x16 * x16)) + ((uint64_t)x15 * x14))) +ℤ ((0x2 * (((uint64_t)x14 * x15) + ((0x2 * ((uint64_t)x16 * x16)) + ((uint64_t)x15 * x14)))) +ℤ (0x10 *ℤ (((uint64_t)x14 * x15) + ((0x2 * ((uint64_t)x16 * x16)) + ((uint64_t)x15 * x14)))))));
+ { ℤ x21 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) +ℤ ((((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12)))) +ℤ ((0x2 * (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))) +ℤ (0x10 *ℤ (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))))));
+ { ℤ x22 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) +ℤ (((0x2 * ((uint64_t)x10 * x15)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + (0x2 * ((uint64_t)x15 * x10)))))) +ℤ ((0x2 *ℤ ((0x2 * ((uint64_t)x10 * x15)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + (0x2 * ((uint64_t)x15 * x10))))))) +ℤ (0x10 *ℤ ((0x2 * ((uint64_t)x10 * x15)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + (0x2 * ((uint64_t)x15 * x10))))))))));
+ { ℤ x23 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) +ℤ ((((uint64_t)x8 * x15) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + ((uint64_t)x15 * x8)))))) +ℤ ((0x2 *ℤ (((uint64_t)x8 * x15) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + ((uint64_t)x15 * x8))))))) +ℤ (0x10 *ℤ (((uint64_t)x8 * x15) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + ((uint64_t)x15 * x8))))))))));
+ { ℤ x24 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) +ℤ ((((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6))))))) +ℤ ((0x2 * (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))) +ℤ (0x10 *ℤ (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))))));
+ { ℤ x25 = (((uint64_t)x2 * x2) +ℤ (((0x2 * ((uint64_t)x4 * x15)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + (0x2 * ((uint64_t)x15 * x4))))))))) +ℤ ((0x2 *ℤ ((0x2 * ((uint64_t)x4 * x15)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + (0x2 * ((uint64_t)x15 * x4)))))))))) +ℤ (0x10 *ℤ ((0x2 * ((uint64_t)x4 * x15)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + (0x2 * ((uint64_t)x15 * x4)))))))))))));
+ { uint64_t x26 = (x17 >> 0x1c);
+ { uint32_t x27 = ((uint32_t)x17 & 0xfffffff);
+ { uint64_t x28 = ((0x10000000 * x26) + x27);
+ { uint64_t x29 = (x28 >> 0x1c);
+ { uint32_t x30 = ((uint32_t)x28 & 0xfffffff);
+ { uint64_t x31 = ((0x10000000 * x29) + x30);
+ { uint64_t x32 = (x31 >> 0x1c);
+ { uint32_t x33 = ((uint32_t)x31 & 0xfffffff);
+ { uint64_t x34 = ((0x10000000 * x32) + x33);
+ { uint64_t x35 = (x34 >> 0x1c);
+ { uint32_t x36 = ((uint32_t)x34 & 0xfffffff);
+ { ℤ x37 = (x25 +ℤ (x35 + ((0x2 * x35) + (0x10 * x35))));
+ { uint64_t x38 = (x37 >> 0x1d);
+ { uint32_t x39 = (x37 & 0x1fffffff);
+ { ℤ x40 = (x38 +ℤ x24);
+ { uint64_t x41 = (x40 >> 0x1c);
+ { uint32_t x42 = (x40 & 0xfffffff);
+ { ℤ x43 = (x41 +ℤ x23);
+ { uint64_t x44 = (x43 >> 0x1c);
+ { uint32_t x45 = (x43 & 0xfffffff);
+ { ℤ x46 = (x44 +ℤ x22);
+ { uint64_t x47 = (x46 >> 0x1d);
+ { uint32_t x48 = (x46 & 0x1fffffff);
+ { ℤ x49 = (x47 +ℤ x21);
+ { uint64_t x50 = (x49 >> 0x1c);
+ { uint32_t x51 = (x49 & 0xfffffff);
+ { ℤ x52 = (x50 +ℤ x20);
+ { uint64_t x53 = (x52 >> 0x1c);
+ { uint32_t x54 = (x52 & 0xfffffff);
+ { ℤ x55 = (x53 +ℤ x19);
+ { uint64_t x56 = (x55 >> 0x1d);
+ { uint32_t x57 = (x55 & 0x1fffffff);
+ { ℤ x58 = (x56 +ℤ x18);
+ { uint64_t x59 = (x58 >> 0x1c);
+ { uint32_t x60 = (x58 & 0xfffffff);
+ { uint64_t x61 = (x59 + x36);
+ { uint32_t x62 = (uint32_t) (x61 >> 0x1c);
+ { uint32_t x63 = ((uint32_t)x61 & 0xfffffff);
+ { uint32_t x64 = (x39 + (x62 + ((0x2 * x62) + (0x10 * x62))));
+ { uint32_t x65 = (x64 >> 0x1d);
+ { uint32_t x66 = (x64 & 0x1fffffff);
+ { uint32_t x67 = (x66 >> 0x1d);
+ { uint32_t x68 = (x66 & 0x1fffffff);
+ { uint32_t x69 = (x68 >> 0x1d);
+ { uint32_t x70 = (x68 & 0x1fffffff);
+ out[0] = x70;
+ out[1] = (x69 + (x67 + (x65 + x42)));
+ out[2] = x45;
+ out[3] = x48;
+ out[4] = x51;
+ out[5] = x54;
+ out[6] = x57;
+ out[7] = x60;
+ out[8] = x63;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e255m2e4m2e1m1/freeze.c b/src/Specific/solinas32_2e255m2e4m2e1m1/freeze.c
index 1e8658d1b..92a207745 100644
--- a/src/Specific/solinas32_2e255m2e4m2e1m1/freeze.c
+++ b/src/Specific/solinas32_2e255m2e4m2e1m1/freeze.c
@@ -1,25 +1,49 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x18;
-out[1] = uint8_t x19 = Op Syntax.SubWithGetBorrow 29 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x1fffffed;;
+static void freeze(uint32_t out[9], const uint32_t in1[9]) {
+ { const uint32_t x15 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x1fffffed);
+ { uint32_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x19, Return x4, 0xfffffff);
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x22, Return x6, 0xfffffff);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x8, 0x1fffffff);
+ { uint32_t x30, uint8_t x31 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x28, Return x10, 0xfffffff);
+ { uint32_t x33, uint8_t x34 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x31, Return x12, 0xfffffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.SubWithGetBorrow 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x34, Return x14, 0x1fffffff);
+ { uint32_t x39, uint8_t x40 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x16, 0xfffffff);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x40, Return x15, 0xfffffff);
+ { uint32_t x44 = (uint32_t)cmovznz(x43, 0x0, 0xffffffff);
+ { uint32_t x45 = (x44 & 0x1fffffed);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.AddWithGetCarry 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x18, Return x45);
+ { uint32_t x49 = (x44 & 0xfffffff);
+ { uint32_t x51, uint8_t x52 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x48, Return x21, Return x49);
+ { uint32_t x53 = (x44 & 0xfffffff);
+ { uint32_t x55, uint8_t x56 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x52, Return x24, Return x53);
+ { uint32_t x57 = (x44 & 0x1fffffff);
+ { uint32_t x59, uint8_t x60 = Op (Syntax.AddWithGetCarry 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x56, Return x27, Return x57);
+ { uint32_t x61 = (x44 & 0xfffffff);
+ { uint32_t x63, uint8_t x64 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x30, Return x61);
+ { uint32_t x65 = (x44 & 0xfffffff);
+ { uint32_t x67, uint8_t x68 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x64, Return x33, Return x65);
+ { uint32_t x69 = (x44 & 0x1fffffff);
+ { uint32_t x71, uint8_t x72 = Op (Syntax.AddWithGetCarry 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x68, Return x36, Return x69);
+ { uint32_t x73 = (x44 & 0xfffffff);
+ { uint32_t x75, uint8_t x76 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x39, Return x73);
+ { uint32_t x77 = (x44 & 0xfffffff);
+ { uint32_t x79, uint8_t _ = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x76, Return x42, Return x77);
+ out[0] = x47;
+ out[1] = x51;
+ out[2] = x55;
+ out[3] = x59;
+ out[4] = x63;
+ out[5] = x67;
+ out[6] = x71;
+ out[7] = x75;
+ out[8] = x79;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e255m765/femul.c b/src/Specific/solinas32_2e255m765/femul.c
index c302e2221..6b918503d 100644
--- a/src/Specific/solinas32_2e255m765/femul.c
+++ b/src/Specific/solinas32_2e255m765/femul.c
@@ -1,86 +1,92 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
-{ uint64_t x48 = (((uint64_t)x5 * x46) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + ((0x2 * ((uint64_t)x15 * x39)) + ((0x2 * ((uint64_t)x17 * x37)) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((0x2 * ((uint64_t)x23 * x31)) + ((0x2 * ((uint64_t)x25 * x29)) + ((uint64_t)x24 * x27))))))))))));
-{ uint64_t x49 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + ((0x2 * ((uint64_t)x15 * x37)) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((0x2 * ((uint64_t)x23 * x29)) + ((uint64_t)x25 * x27))))))))))) + (0x2fd * ((uint64_t)x24 * x46)));
-{ uint64_t x50 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x23 * x27)))))))))) + (0x2fd * (((uint64_t)x25 * x46) + ((uint64_t)x24 * x47))));
-{ uint64_t x51 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + (((uint64_t)x13 * x35) + ((0x2 * ((uint64_t)x15 * x33)) + ((0x2 * ((uint64_t)x17 * x31)) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x21 * x27))))))))) + (0x2fd * ((0x2 * ((uint64_t)x23 * x46)) + ((0x2 * ((uint64_t)x25 * x47)) + (0x2 * ((uint64_t)x24 * x45))))));
-{ uint64_t x52 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((uint64_t)x19 * x27)))))))) + (0x2fd * (((uint64_t)x21 * x46) + ((0x2 * ((uint64_t)x23 * x47)) + ((0x2 * ((uint64_t)x25 * x45)) + ((uint64_t)x24 * x43))))));
-{ uint64_t x53 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + ((0x2 * ((uint64_t)x15 * x29)) + ((uint64_t)x17 * x27))))))) + (0x2fd * (((uint64_t)x19 * x46) + (((uint64_t)x21 * x47) + ((0x2 * ((uint64_t)x23 * x45)) + (((uint64_t)x25 * x43) + ((uint64_t)x24 * x41)))))));
-{ uint64_t x54 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + ((uint64_t)x15 * x27)))))) + (0x2fd * (((uint64_t)x17 * x46) + (((uint64_t)x19 * x47) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + ((uint64_t)x24 * x39))))))));
-{ uint64_t x55 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((uint64_t)x13 * x27))))) + (0x2fd * ((0x2 * ((uint64_t)x15 * x46)) + ((0x2 * ((uint64_t)x17 * x47)) + ((0x2 * ((uint64_t)x19 * x45)) + (((uint64_t)x21 * x43) + ((0x2 * ((uint64_t)x23 * x41)) + ((0x2 * ((uint64_t)x25 * x39)) + (0x2 * ((uint64_t)x24 * x37))))))))));
-{ uint64_t x56 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((uint64_t)x11 * x27)))) + (0x2fd * (((uint64_t)x13 * x46) + ((0x2 * ((uint64_t)x15 * x47)) + ((0x2 * ((uint64_t)x17 * x45)) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + ((0x2 * ((uint64_t)x23 * x39)) + ((0x2 * ((uint64_t)x25 * x37)) + ((uint64_t)x24 * x35))))))))));
-{ uint64_t x57 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((uint64_t)x9 * x27))) + (0x2fd * (((uint64_t)x11 * x46) + (((uint64_t)x13 * x47) + ((0x2 * ((uint64_t)x15 * x45)) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + ((0x2 * ((uint64_t)x23 * x37)) + (((uint64_t)x25 * x35) + ((uint64_t)x24 * x33)))))))))));
-{ uint64_t x58 = ((((uint64_t)x5 * x29) + ((uint64_t)x7 * x27)) + (0x2fd * (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31))))))))))));
-{ uint64_t x59 = (((uint64_t)x5 * x27) + (0x2fd * ((0x2 * ((uint64_t)x7 * x46)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + (((uint64_t)x13 * x43) + ((0x2 * ((uint64_t)x15 * x41)) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + (((uint64_t)x21 * x35) + ((0x2 * ((uint64_t)x23 * x33)) + ((0x2 * ((uint64_t)x25 * x31)) + (0x2 * ((uint64_t)x24 * x29))))))))))))));
-{ uint64_t x60 = (x59 >> 0x16);
-{ uint32_t x61 = ((uint32_t)x59 & 0x3fffff);
-{ uint64_t x62 = (x60 + x58);
-{ uint64_t x63 = (x62 >> 0x15);
-{ uint32_t x64 = ((uint32_t)x62 & 0x1fffff);
-{ uint64_t x65 = (x63 + x57);
-{ uint64_t x66 = (x65 >> 0x15);
-{ uint32_t x67 = ((uint32_t)x65 & 0x1fffff);
-{ uint64_t x68 = (x66 + x56);
-{ uint64_t x69 = (x68 >> 0x15);
-{ uint32_t x70 = ((uint32_t)x68 & 0x1fffff);
-{ uint64_t x71 = (x69 + x55);
-{ uint64_t x72 = (x71 >> 0x16);
-{ uint32_t x73 = ((uint32_t)x71 & 0x3fffff);
-{ uint64_t x74 = (x72 + x54);
-{ uint64_t x75 = (x74 >> 0x15);
-{ uint32_t x76 = ((uint32_t)x74 & 0x1fffff);
-{ uint64_t x77 = (x75 + x53);
-{ uint64_t x78 = (x77 >> 0x15);
-{ uint32_t x79 = ((uint32_t)x77 & 0x1fffff);
-{ uint64_t x80 = (x78 + x52);
-{ uint64_t x81 = (x80 >> 0x15);
-{ uint32_t x82 = ((uint32_t)x80 & 0x1fffff);
-{ uint64_t x83 = (x81 + x51);
-{ uint64_t x84 = (x83 >> 0x16);
-{ uint32_t x85 = ((uint32_t)x83 & 0x3fffff);
-{ uint64_t x86 = (x84 + x50);
-{ uint64_t x87 = (x86 >> 0x15);
-{ uint32_t x88 = ((uint32_t)x86 & 0x1fffff);
-{ uint64_t x89 = (x87 + x49);
-{ uint64_t x90 = (x89 >> 0x15);
-{ uint32_t x91 = ((uint32_t)x89 & 0x1fffff);
-{ uint64_t x92 = (x90 + x48);
-{ uint32_t x93 = (uint32_t) (x92 >> 0x15);
-{ uint32_t x94 = ((uint32_t)x92 & 0x1fffff);
-{ uint64_t x95 = (x61 + ((uint64_t)0x2fd * x93));
-{ uint32_t x96 = (uint32_t) (x95 >> 0x16);
-{ uint32_t x97 = ((uint32_t)x95 & 0x3fffff);
-{ uint32_t x98 = (x96 + x64);
-{ uint32_t x99 = (x98 >> 0x15);
-{ uint32_t x100 = (x98 & 0x1fffff);
-out[0] = x94;
-out[1] = x91;
-out[2] = x88;
-out[3] = x85;
-out[4] = x82;
-out[5] = x79;
-out[6] = x76;
-out[7] = x73;
-out[8] = x70;
-out[9] = x99 + x67;
-out[10] = x100;
-out[11] = x97;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void femul(uint32_t out[12], const uint32_t in1[12], const uint32_t in2[12]) {
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x46 = in2[11];
+ { const uint32_t x47 = in2[10];
+ { const uint32_t x45 = in2[9];
+ { const uint32_t x43 = in2[8];
+ { const uint32_t x41 = in2[7];
+ { const uint32_t x39 = in2[6];
+ { const uint32_t x37 = in2[5];
+ { const uint32_t x35 = in2[4];
+ { const uint32_t x33 = in2[3];
+ { const uint32_t x31 = in2[2];
+ { const uint32_t x29 = in2[1];
+ { const uint32_t x27 = in2[0];
+ { uint64_t x48 = (((uint64_t)x5 * x46) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + ((0x2 * ((uint64_t)x15 * x39)) + ((0x2 * ((uint64_t)x17 * x37)) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((0x2 * ((uint64_t)x23 * x31)) + ((0x2 * ((uint64_t)x25 * x29)) + ((uint64_t)x24 * x27))))))))))));
+ { uint64_t x49 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + ((0x2 * ((uint64_t)x15 * x37)) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((0x2 * ((uint64_t)x23 * x29)) + ((uint64_t)x25 * x27))))))))))) + (0x2fd * ((uint64_t)x24 * x46)));
+ { uint64_t x50 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x23 * x27)))))))))) + (0x2fd * (((uint64_t)x25 * x46) + ((uint64_t)x24 * x47))));
+ { uint64_t x51 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + (((uint64_t)x13 * x35) + ((0x2 * ((uint64_t)x15 * x33)) + ((0x2 * ((uint64_t)x17 * x31)) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x21 * x27))))))))) + (0x2fd * ((0x2 * ((uint64_t)x23 * x46)) + ((0x2 * ((uint64_t)x25 * x47)) + (0x2 * ((uint64_t)x24 * x45))))));
+ { uint64_t x52 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((uint64_t)x19 * x27)))))))) + (0x2fd * (((uint64_t)x21 * x46) + ((0x2 * ((uint64_t)x23 * x47)) + ((0x2 * ((uint64_t)x25 * x45)) + ((uint64_t)x24 * x43))))));
+ { uint64_t x53 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + ((0x2 * ((uint64_t)x15 * x29)) + ((uint64_t)x17 * x27))))))) + (0x2fd * (((uint64_t)x19 * x46) + (((uint64_t)x21 * x47) + ((0x2 * ((uint64_t)x23 * x45)) + (((uint64_t)x25 * x43) + ((uint64_t)x24 * x41)))))));
+ { uint64_t x54 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + ((uint64_t)x15 * x27)))))) + (0x2fd * (((uint64_t)x17 * x46) + (((uint64_t)x19 * x47) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + ((uint64_t)x24 * x39))))))));
+ { uint64_t x55 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((uint64_t)x13 * x27))))) + (0x2fd * ((0x2 * ((uint64_t)x15 * x46)) + ((0x2 * ((uint64_t)x17 * x47)) + ((0x2 * ((uint64_t)x19 * x45)) + (((uint64_t)x21 * x43) + ((0x2 * ((uint64_t)x23 * x41)) + ((0x2 * ((uint64_t)x25 * x39)) + (0x2 * ((uint64_t)x24 * x37))))))))));
+ { uint64_t x56 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((uint64_t)x11 * x27)))) + (0x2fd * (((uint64_t)x13 * x46) + ((0x2 * ((uint64_t)x15 * x47)) + ((0x2 * ((uint64_t)x17 * x45)) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + ((0x2 * ((uint64_t)x23 * x39)) + ((0x2 * ((uint64_t)x25 * x37)) + ((uint64_t)x24 * x35))))))))));
+ { uint64_t x57 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((uint64_t)x9 * x27))) + (0x2fd * (((uint64_t)x11 * x46) + (((uint64_t)x13 * x47) + ((0x2 * ((uint64_t)x15 * x45)) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + ((0x2 * ((uint64_t)x23 * x37)) + (((uint64_t)x25 * x35) + ((uint64_t)x24 * x33)))))))))));
+ { uint64_t x58 = ((((uint64_t)x5 * x29) + ((uint64_t)x7 * x27)) + (0x2fd * (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31))))))))))));
+ { uint64_t x59 = (((uint64_t)x5 * x27) + (0x2fd * ((0x2 * ((uint64_t)x7 * x46)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + (((uint64_t)x13 * x43) + ((0x2 * ((uint64_t)x15 * x41)) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + (((uint64_t)x21 * x35) + ((0x2 * ((uint64_t)x23 * x33)) + ((0x2 * ((uint64_t)x25 * x31)) + (0x2 * ((uint64_t)x24 * x29))))))))))))));
+ { uint64_t x60 = (x59 >> 0x16);
+ { uint32_t x61 = ((uint32_t)x59 & 0x3fffff);
+ { uint64_t x62 = (x60 + x58);
+ { uint64_t x63 = (x62 >> 0x15);
+ { uint32_t x64 = ((uint32_t)x62 & 0x1fffff);
+ { uint64_t x65 = (x63 + x57);
+ { uint64_t x66 = (x65 >> 0x15);
+ { uint32_t x67 = ((uint32_t)x65 & 0x1fffff);
+ { uint64_t x68 = (x66 + x56);
+ { uint64_t x69 = (x68 >> 0x15);
+ { uint32_t x70 = ((uint32_t)x68 & 0x1fffff);
+ { uint64_t x71 = (x69 + x55);
+ { uint64_t x72 = (x71 >> 0x16);
+ { uint32_t x73 = ((uint32_t)x71 & 0x3fffff);
+ { uint64_t x74 = (x72 + x54);
+ { uint64_t x75 = (x74 >> 0x15);
+ { uint32_t x76 = ((uint32_t)x74 & 0x1fffff);
+ { uint64_t x77 = (x75 + x53);
+ { uint64_t x78 = (x77 >> 0x15);
+ { uint32_t x79 = ((uint32_t)x77 & 0x1fffff);
+ { uint64_t x80 = (x78 + x52);
+ { uint64_t x81 = (x80 >> 0x15);
+ { uint32_t x82 = ((uint32_t)x80 & 0x1fffff);
+ { uint64_t x83 = (x81 + x51);
+ { uint64_t x84 = (x83 >> 0x16);
+ { uint32_t x85 = ((uint32_t)x83 & 0x3fffff);
+ { uint64_t x86 = (x84 + x50);
+ { uint64_t x87 = (x86 >> 0x15);
+ { uint32_t x88 = ((uint32_t)x86 & 0x1fffff);
+ { uint64_t x89 = (x87 + x49);
+ { uint64_t x90 = (x89 >> 0x15);
+ { uint32_t x91 = ((uint32_t)x89 & 0x1fffff);
+ { uint64_t x92 = (x90 + x48);
+ { uint32_t x93 = (uint32_t) (x92 >> 0x15);
+ { uint32_t x94 = ((uint32_t)x92 & 0x1fffff);
+ { uint64_t x95 = (x61 + ((uint64_t)0x2fd * x93));
+ { uint32_t x96 = (uint32_t) (x95 >> 0x16);
+ { uint32_t x97 = ((uint32_t)x95 & 0x3fffff);
+ { uint32_t x98 = (x96 + x64);
+ { uint32_t x99 = (x98 >> 0x15);
+ { uint32_t x100 = (x98 & 0x1fffff);
+ out[0] = x97;
+ out[1] = x100;
+ out[2] = (x99 + x67);
+ out[3] = x70;
+ out[4] = x73;
+ out[5] = x76;
+ out[6] = x79;
+ out[7] = x82;
+ out[8] = x85;
+ out[9] = x88;
+ out[10] = x91;
+ out[11] = x94;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e255m765/fesquare.c b/src/Specific/solinas32_2e255m765/fesquare.c
index a4ae3fa15..c9476e253 100644
--- a/src/Specific/solinas32_2e255m765/fesquare.c
+++ b/src/Specific/solinas32_2e255m765/fesquare.c
@@ -1,86 +1,80 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x23 = (((uint64_t)x2 * x21) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x21 * x2))))))))))));
-{ uint64_t x24 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + ((0x2 * ((uint64_t)x12 * x12)) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x2fd * ((uint64_t)x21 * x21)));
-{ uint64_t x25 = ((((uint64_t)x2 * x20) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x20 * x2)))))))))) + (0x2fd * (((uint64_t)x22 * x21) + ((uint64_t)x21 * x22))));
-{ uint64_t x26 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + (((uint64_t)x10 * x10) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x2fd * ((0x2 * ((uint64_t)x20 * x21)) + ((0x2 * ((uint64_t)x22 * x22)) + (0x2 * ((uint64_t)x21 * x20))))));
-{ uint64_t x27 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x2fd * (((uint64_t)x18 * x21) + ((0x2 * ((uint64_t)x20 * x22)) + ((0x2 * ((uint64_t)x22 * x20)) + ((uint64_t)x21 * x18))))));
-{ uint64_t x28 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x2fd * (((uint64_t)x16 * x21) + (((uint64_t)x18 * x22) + ((0x2 * ((uint64_t)x20 * x20)) + (((uint64_t)x22 * x18) + ((uint64_t)x21 * x16)))))));
-{ uint64_t x29 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x2fd * (((uint64_t)x14 * x21) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + ((uint64_t)x21 * x14))))))));
-{ uint64_t x30 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x2fd * ((0x2 * ((uint64_t)x12 * x21)) + ((0x2 * ((uint64_t)x14 * x22)) + ((0x2 * ((uint64_t)x16 * x20)) + (((uint64_t)x18 * x18) + ((0x2 * ((uint64_t)x20 * x16)) + ((0x2 * ((uint64_t)x22 * x14)) + (0x2 * ((uint64_t)x21 * x12))))))))));
-{ uint64_t x31 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x2fd * (((uint64_t)x10 * x21) + ((0x2 * ((uint64_t)x12 * x22)) + ((0x2 * ((uint64_t)x14 * x20)) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((0x2 * ((uint64_t)x20 * x14)) + ((0x2 * ((uint64_t)x22 * x12)) + ((uint64_t)x21 * x10))))))))));
-{ uint64_t x32 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x2fd * (((uint64_t)x8 * x21) + (((uint64_t)x10 * x22) + ((0x2 * ((uint64_t)x12 * x20)) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + ((0x2 * ((uint64_t)x20 * x12)) + (((uint64_t)x22 * x10) + ((uint64_t)x21 * x8)))))))))));
-{ uint64_t x33 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x2fd * (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6))))))))))));
-{ uint64_t x34 = (((uint64_t)x2 * x2) + (0x2fd * ((0x2 * ((uint64_t)x4 * x21)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + (((uint64_t)x10 * x18) + ((0x2 * ((uint64_t)x12 * x16)) + ((0x2 * ((uint64_t)x14 * x14)) + ((0x2 * ((uint64_t)x16 * x12)) + (((uint64_t)x18 * x10) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + (0x2 * ((uint64_t)x21 * x4))))))))))))));
-{ uint64_t x35 = (x34 >> 0x16);
-{ uint32_t x36 = ((uint32_t)x34 & 0x3fffff);
-{ uint64_t x37 = (x35 + x33);
-{ uint64_t x38 = (x37 >> 0x15);
-{ uint32_t x39 = ((uint32_t)x37 & 0x1fffff);
-{ uint64_t x40 = (x38 + x32);
-{ uint64_t x41 = (x40 >> 0x15);
-{ uint32_t x42 = ((uint32_t)x40 & 0x1fffff);
-{ uint64_t x43 = (x41 + x31);
-{ uint64_t x44 = (x43 >> 0x15);
-{ uint32_t x45 = ((uint32_t)x43 & 0x1fffff);
-{ uint64_t x46 = (x44 + x30);
-{ uint64_t x47 = (x46 >> 0x16);
-{ uint32_t x48 = ((uint32_t)x46 & 0x3fffff);
-{ uint64_t x49 = (x47 + x29);
-{ uint64_t x50 = (x49 >> 0x15);
-{ uint32_t x51 = ((uint32_t)x49 & 0x1fffff);
-{ uint64_t x52 = (x50 + x28);
-{ uint64_t x53 = (x52 >> 0x15);
-{ uint32_t x54 = ((uint32_t)x52 & 0x1fffff);
-{ uint64_t x55 = (x53 + x27);
-{ uint64_t x56 = (x55 >> 0x15);
-{ uint32_t x57 = ((uint32_t)x55 & 0x1fffff);
-{ uint64_t x58 = (x56 + x26);
-{ uint64_t x59 = (x58 >> 0x16);
-{ uint32_t x60 = ((uint32_t)x58 & 0x3fffff);
-{ uint64_t x61 = (x59 + x25);
-{ uint64_t x62 = (x61 >> 0x15);
-{ uint32_t x63 = ((uint32_t)x61 & 0x1fffff);
-{ uint64_t x64 = (x62 + x24);
-{ uint64_t x65 = (x64 >> 0x15);
-{ uint32_t x66 = ((uint32_t)x64 & 0x1fffff);
-{ uint64_t x67 = (x65 + x23);
-{ uint32_t x68 = (uint32_t) (x67 >> 0x15);
-{ uint32_t x69 = ((uint32_t)x67 & 0x1fffff);
-{ uint64_t x70 = (x36 + ((uint64_t)0x2fd * x68));
-{ uint32_t x71 = (uint32_t) (x70 >> 0x16);
-{ uint32_t x72 = ((uint32_t)x70 & 0x3fffff);
-{ uint32_t x73 = (x71 + x39);
-{ uint32_t x74 = (x73 >> 0x15);
-{ uint32_t x75 = (x73 & 0x1fffff);
-out[0] = x69;
-out[1] = x66;
-out[2] = x63;
-out[3] = x60;
-out[4] = x57;
-out[5] = x54;
-out[6] = x51;
-out[7] = x48;
-out[8] = x45;
-out[9] = x74 + x42;
-out[10] = x75;
-out[11] = x72;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void fesquare(uint32_t out[12], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x23 = (((uint64_t)x2 * x21) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x21 * x2))))))))))));
+ { uint64_t x24 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + ((0x2 * ((uint64_t)x12 * x12)) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x2fd * ((uint64_t)x21 * x21)));
+ { uint64_t x25 = ((((uint64_t)x2 * x20) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x20 * x2)))))))))) + (0x2fd * (((uint64_t)x22 * x21) + ((uint64_t)x21 * x22))));
+ { uint64_t x26 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + (((uint64_t)x10 * x10) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x2fd * ((0x2 * ((uint64_t)x20 * x21)) + ((0x2 * ((uint64_t)x22 * x22)) + (0x2 * ((uint64_t)x21 * x20))))));
+ { uint64_t x27 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x2fd * (((uint64_t)x18 * x21) + ((0x2 * ((uint64_t)x20 * x22)) + ((0x2 * ((uint64_t)x22 * x20)) + ((uint64_t)x21 * x18))))));
+ { uint64_t x28 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x2fd * (((uint64_t)x16 * x21) + (((uint64_t)x18 * x22) + ((0x2 * ((uint64_t)x20 * x20)) + (((uint64_t)x22 * x18) + ((uint64_t)x21 * x16)))))));
+ { uint64_t x29 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x2fd * (((uint64_t)x14 * x21) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + ((uint64_t)x21 * x14))))))));
+ { uint64_t x30 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x2fd * ((0x2 * ((uint64_t)x12 * x21)) + ((0x2 * ((uint64_t)x14 * x22)) + ((0x2 * ((uint64_t)x16 * x20)) + (((uint64_t)x18 * x18) + ((0x2 * ((uint64_t)x20 * x16)) + ((0x2 * ((uint64_t)x22 * x14)) + (0x2 * ((uint64_t)x21 * x12))))))))));
+ { uint64_t x31 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x2fd * (((uint64_t)x10 * x21) + ((0x2 * ((uint64_t)x12 * x22)) + ((0x2 * ((uint64_t)x14 * x20)) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((0x2 * ((uint64_t)x20 * x14)) + ((0x2 * ((uint64_t)x22 * x12)) + ((uint64_t)x21 * x10))))))))));
+ { uint64_t x32 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x2fd * (((uint64_t)x8 * x21) + (((uint64_t)x10 * x22) + ((0x2 * ((uint64_t)x12 * x20)) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + ((0x2 * ((uint64_t)x20 * x12)) + (((uint64_t)x22 * x10) + ((uint64_t)x21 * x8)))))))))));
+ { uint64_t x33 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x2fd * (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6))))))))))));
+ { uint64_t x34 = (((uint64_t)x2 * x2) + (0x2fd * ((0x2 * ((uint64_t)x4 * x21)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + (((uint64_t)x10 * x18) + ((0x2 * ((uint64_t)x12 * x16)) + ((0x2 * ((uint64_t)x14 * x14)) + ((0x2 * ((uint64_t)x16 * x12)) + (((uint64_t)x18 * x10) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + (0x2 * ((uint64_t)x21 * x4))))))))))))));
+ { uint64_t x35 = (x34 >> 0x16);
+ { uint32_t x36 = ((uint32_t)x34 & 0x3fffff);
+ { uint64_t x37 = (x35 + x33);
+ { uint64_t x38 = (x37 >> 0x15);
+ { uint32_t x39 = ((uint32_t)x37 & 0x1fffff);
+ { uint64_t x40 = (x38 + x32);
+ { uint64_t x41 = (x40 >> 0x15);
+ { uint32_t x42 = ((uint32_t)x40 & 0x1fffff);
+ { uint64_t x43 = (x41 + x31);
+ { uint64_t x44 = (x43 >> 0x15);
+ { uint32_t x45 = ((uint32_t)x43 & 0x1fffff);
+ { uint64_t x46 = (x44 + x30);
+ { uint64_t x47 = (x46 >> 0x16);
+ { uint32_t x48 = ((uint32_t)x46 & 0x3fffff);
+ { uint64_t x49 = (x47 + x29);
+ { uint64_t x50 = (x49 >> 0x15);
+ { uint32_t x51 = ((uint32_t)x49 & 0x1fffff);
+ { uint64_t x52 = (x50 + x28);
+ { uint64_t x53 = (x52 >> 0x15);
+ { uint32_t x54 = ((uint32_t)x52 & 0x1fffff);
+ { uint64_t x55 = (x53 + x27);
+ { uint64_t x56 = (x55 >> 0x15);
+ { uint32_t x57 = ((uint32_t)x55 & 0x1fffff);
+ { uint64_t x58 = (x56 + x26);
+ { uint64_t x59 = (x58 >> 0x16);
+ { uint32_t x60 = ((uint32_t)x58 & 0x3fffff);
+ { uint64_t x61 = (x59 + x25);
+ { uint64_t x62 = (x61 >> 0x15);
+ { uint32_t x63 = ((uint32_t)x61 & 0x1fffff);
+ { uint64_t x64 = (x62 + x24);
+ { uint64_t x65 = (x64 >> 0x15);
+ { uint32_t x66 = ((uint32_t)x64 & 0x1fffff);
+ { uint64_t x67 = (x65 + x23);
+ { uint32_t x68 = (uint32_t) (x67 >> 0x15);
+ { uint32_t x69 = ((uint32_t)x67 & 0x1fffff);
+ { uint64_t x70 = (x36 + ((uint64_t)0x2fd * x68));
+ { uint32_t x71 = (uint32_t) (x70 >> 0x16);
+ { uint32_t x72 = ((uint32_t)x70 & 0x3fffff);
+ { uint32_t x73 = (x71 + x39);
+ { uint32_t x74 = (x73 >> 0x15);
+ { uint32_t x75 = (x73 & 0x1fffff);
+ out[0] = x72;
+ out[1] = x75;
+ out[2] = (x74 + x42);
+ out[3] = x45;
+ out[4] = x48;
+ out[5] = x51;
+ out[6] = x54;
+ out[7] = x57;
+ out[8] = x60;
+ out[9] = x63;
+ out[10] = x66;
+ out[11] = x69;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e255m765/freeze.c b/src/Specific/solinas32_2e255m765/freeze.c
index 4fb73d4cc..998e47a88 100644
--- a/src/Specific/solinas32_2e255m765/freeze.c
+++ b/src/Specific/solinas32_2e255m765/freeze.c
@@ -1,25 +1,64 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x24;
-out[1] = uint8_t x25 = Op Syntax.SubWithGetBorrow 22 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3ffd03;;
+static void freeze(uint32_t out[12], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x3ffd03);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x4, 0x1fffff);
+ { uint32_t x30, uint8_t x31 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x28, Return x6, 0x1fffff);
+ { uint32_t x33, uint8_t x34 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x31, Return x8, 0x1fffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x34, Return x10, 0x3fffff);
+ { uint32_t x39, uint8_t x40 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x12, 0x1fffff);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x40, Return x14, 0x1fffff);
+ { uint32_t x45, uint8_t x46 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x43, Return x16, 0x1fffff);
+ { uint32_t x48, uint8_t x49 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x46, Return x18, 0x3fffff);
+ { uint32_t x51, uint8_t x52 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x49, Return x20, 0x1fffff);
+ { uint32_t x54, uint8_t x55 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x52, Return x22, 0x1fffff);
+ { uint32_t x57, uint8_t x58 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x55, Return x21, 0x1fffff);
+ { uint32_t x59 = (uint32_t)cmovznz(x58, 0x0, 0xffffffff);
+ { uint32_t x60 = (x59 & 0x3ffd03);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x24, Return x60);
+ { uint32_t x64 = (x59 & 0x1fffff);
+ { uint32_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x27, Return x64);
+ { uint32_t x68 = (x59 & 0x1fffff);
+ { uint32_t x70, uint8_t x71 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x67, Return x30, Return x68);
+ { uint32_t x72 = (x59 & 0x1fffff);
+ { uint32_t x74, uint8_t x75 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x71, Return x33, Return x72);
+ { uint32_t x76 = (x59 & 0x3fffff);
+ { uint32_t x78, uint8_t x79 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x36, Return x76);
+ { uint32_t x80 = (x59 & 0x1fffff);
+ { uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x79, Return x39, Return x80);
+ { uint32_t x84 = (x59 & 0x1fffff);
+ { uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x42, Return x84);
+ { uint32_t x88 = (x59 & 0x1fffff);
+ { uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x45, Return x88);
+ { uint32_t x92 = (x59 & 0x3fffff);
+ { uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x48, Return x92);
+ { uint32_t x96 = (x59 & 0x1fffff);
+ { uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x51, Return x96);
+ { uint32_t x100 = (x59 & 0x1fffff);
+ { uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x54, Return x100);
+ { uint32_t x104 = (x59 & 0x1fffff);
+ { uint32_t x106, uint8_t _ = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x57, Return x104);
+ out[0] = x62;
+ out[1] = x66;
+ out[2] = x70;
+ out[3] = x74;
+ out[4] = x78;
+ out[5] = x82;
+ out[6] = x86;
+ out[7] = x90;
+ out[8] = x94;
+ out[9] = x98;
+ out[10] = x102;
+ out[11] = x106;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e256m189/femul.c b/src/Specific/solinas32_2e256m189/femul.c
index 3c9d8ac20..f9b6991df 100644
--- a/src/Specific/solinas32_2e256m189/femul.c
+++ b/src/Specific/solinas32_2e256m189/femul.c
@@ -1,86 +1,92 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
-{ uint64_t x48 = (((uint64_t)x5 * x46) + ((0x2 * ((uint64_t)x7 * x47)) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + ((0x2 * ((uint64_t)x13 * x41)) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((0x2 * ((uint64_t)x19 * x35)) + (((uint64_t)x21 * x33) + (((uint64_t)x23 * x31) + ((0x2 * ((uint64_t)x25 * x29)) + ((uint64_t)x24 * x27))))))))))));
-{ uint64_t x49 = ((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + (((uint64_t)x23 * x29) + ((uint64_t)x25 * x27))))))))))) + (0xbd * ((uint64_t)x24 * x46)));
-{ uint64_t x50 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + (((uint64_t)x11 * x39) + ((0x2 * ((uint64_t)x13 * x37)) + ((0x2 * ((uint64_t)x15 * x35)) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + ((0x2 * ((uint64_t)x21 * x29)) + ((uint64_t)x23 * x27)))))))))) + (0xbd * ((0x2 * ((uint64_t)x25 * x46)) + (0x2 * ((uint64_t)x24 * x47)))));
-{ uint64_t x51 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x21 * x27))))))))) + (0xbd * (((uint64_t)x23 * x46) + ((0x2 * ((uint64_t)x25 * x47)) + ((uint64_t)x24 * x45)))));
-{ uint64_t x52 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x19 * x27)))))))) + (0xbd * (((uint64_t)x21 * x46) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + ((uint64_t)x24 * x43))))));
-{ uint64_t x53 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((uint64_t)x17 * x27))))))) + (0xbd * ((0x2 * ((uint64_t)x19 * x46)) + ((0x2 * ((uint64_t)x21 * x47)) + (((uint64_t)x23 * x45) + ((0x2 * ((uint64_t)x25 * x43)) + (0x2 * ((uint64_t)x24 * x41))))))));
-{ uint64_t x54 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + ((0x2 * ((uint64_t)x13 * x29)) + ((uint64_t)x15 * x27)))))) + (0xbd * (((uint64_t)x17 * x46) + ((0x2 * ((uint64_t)x19 * x47)) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + ((0x2 * ((uint64_t)x25 * x41)) + ((uint64_t)x24 * x39))))))));
-{ uint64_t x55 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + ((uint64_t)x13 * x27))))) + (0xbd * (((uint64_t)x15 * x46) + (((uint64_t)x17 * x47) + (((uint64_t)x19 * x45) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + (((uint64_t)x25 * x39) + ((uint64_t)x24 * x37)))))))));
-{ uint64_t x56 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((uint64_t)x11 * x27)))) + (0xbd * ((0x2 * ((uint64_t)x13 * x46)) + ((0x2 * ((uint64_t)x15 * x47)) + (((uint64_t)x17 * x45) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + (((uint64_t)x23 * x39) + ((0x2 * ((uint64_t)x25 * x37)) + (0x2 * ((uint64_t)x24 * x35)))))))))));
-{ uint64_t x57 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((uint64_t)x9 * x27))) + (0xbd * (((uint64_t)x11 * x46) + ((0x2 * ((uint64_t)x13 * x47)) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((0x2 * ((uint64_t)x19 * x41)) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + ((0x2 * ((uint64_t)x25 * x35)) + ((uint64_t)x24 * x33)))))))))));
-{ uint64_t x58 = ((((uint64_t)x5 * x29) + ((uint64_t)x7 * x27)) + (0xbd * (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31))))))))))));
-{ uint64_t x59 = (((uint64_t)x5 * x27) + (0xbd * ((0x2 * ((uint64_t)x7 * x46)) + ((0x2 * ((uint64_t)x9 * x47)) + (((uint64_t)x11 * x45) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + (((uint64_t)x17 * x39) + ((0x2 * ((uint64_t)x19 * x37)) + ((0x2 * ((uint64_t)x21 * x35)) + (((uint64_t)x23 * x33) + ((0x2 * ((uint64_t)x25 * x31)) + (0x2 * ((uint64_t)x24 * x29))))))))))))));
-{ uint64_t x60 = (x59 >> 0x16);
-{ uint32_t x61 = ((uint32_t)x59 & 0x3fffff);
-{ uint64_t x62 = (x60 + x58);
-{ uint64_t x63 = (x62 >> 0x15);
-{ uint32_t x64 = ((uint32_t)x62 & 0x1fffff);
-{ uint64_t x65 = (x63 + x57);
-{ uint64_t x66 = (x65 >> 0x15);
-{ uint32_t x67 = ((uint32_t)x65 & 0x1fffff);
-{ uint64_t x68 = (x66 + x56);
-{ uint64_t x69 = (x68 >> 0x16);
-{ uint32_t x70 = ((uint32_t)x68 & 0x3fffff);
-{ uint64_t x71 = (x69 + x55);
-{ uint64_t x72 = (x71 >> 0x15);
-{ uint32_t x73 = ((uint32_t)x71 & 0x1fffff);
-{ uint64_t x74 = (x72 + x54);
-{ uint64_t x75 = (x74 >> 0x15);
-{ uint32_t x76 = ((uint32_t)x74 & 0x1fffff);
-{ uint64_t x77 = (x75 + x53);
-{ uint64_t x78 = (x77 >> 0x16);
-{ uint32_t x79 = ((uint32_t)x77 & 0x3fffff);
-{ uint64_t x80 = (x78 + x52);
-{ uint64_t x81 = (x80 >> 0x15);
-{ uint32_t x82 = ((uint32_t)x80 & 0x1fffff);
-{ uint64_t x83 = (x81 + x51);
-{ uint64_t x84 = (x83 >> 0x15);
-{ uint32_t x85 = ((uint32_t)x83 & 0x1fffff);
-{ uint64_t x86 = (x84 + x50);
-{ uint64_t x87 = (x86 >> 0x16);
-{ uint32_t x88 = ((uint32_t)x86 & 0x3fffff);
-{ uint64_t x89 = (x87 + x49);
-{ uint64_t x90 = (x89 >> 0x15);
-{ uint32_t x91 = ((uint32_t)x89 & 0x1fffff);
-{ uint64_t x92 = (x90 + x48);
-{ uint32_t x93 = (uint32_t) (x92 >> 0x15);
-{ uint32_t x94 = ((uint32_t)x92 & 0x1fffff);
-{ uint64_t x95 = (x61 + ((uint64_t)0xbd * x93));
-{ uint32_t x96 = (uint32_t) (x95 >> 0x16);
-{ uint32_t x97 = ((uint32_t)x95 & 0x3fffff);
-{ uint32_t x98 = (x96 + x64);
-{ uint32_t x99 = (x98 >> 0x15);
-{ uint32_t x100 = (x98 & 0x1fffff);
-out[0] = x94;
-out[1] = x91;
-out[2] = x88;
-out[3] = x85;
-out[4] = x82;
-out[5] = x79;
-out[6] = x76;
-out[7] = x73;
-out[8] = x70;
-out[9] = x99 + x67;
-out[10] = x100;
-out[11] = x97;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void femul(uint32_t out[12], const uint32_t in1[12], const uint32_t in2[12]) {
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x46 = in2[11];
+ { const uint32_t x47 = in2[10];
+ { const uint32_t x45 = in2[9];
+ { const uint32_t x43 = in2[8];
+ { const uint32_t x41 = in2[7];
+ { const uint32_t x39 = in2[6];
+ { const uint32_t x37 = in2[5];
+ { const uint32_t x35 = in2[4];
+ { const uint32_t x33 = in2[3];
+ { const uint32_t x31 = in2[2];
+ { const uint32_t x29 = in2[1];
+ { const uint32_t x27 = in2[0];
+ { uint64_t x48 = (((uint64_t)x5 * x46) + ((0x2 * ((uint64_t)x7 * x47)) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + ((0x2 * ((uint64_t)x13 * x41)) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((0x2 * ((uint64_t)x19 * x35)) + (((uint64_t)x21 * x33) + (((uint64_t)x23 * x31) + ((0x2 * ((uint64_t)x25 * x29)) + ((uint64_t)x24 * x27))))))))))));
+ { uint64_t x49 = ((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + (((uint64_t)x23 * x29) + ((uint64_t)x25 * x27))))))))))) + (0xbd * ((uint64_t)x24 * x46)));
+ { uint64_t x50 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + (((uint64_t)x11 * x39) + ((0x2 * ((uint64_t)x13 * x37)) + ((0x2 * ((uint64_t)x15 * x35)) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + ((0x2 * ((uint64_t)x21 * x29)) + ((uint64_t)x23 * x27)))))))))) + (0xbd * ((0x2 * ((uint64_t)x25 * x46)) + (0x2 * ((uint64_t)x24 * x47)))));
+ { uint64_t x51 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x21 * x27))))))))) + (0xbd * (((uint64_t)x23 * x46) + ((0x2 * ((uint64_t)x25 * x47)) + ((uint64_t)x24 * x45)))));
+ { uint64_t x52 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x19 * x27)))))))) + (0xbd * (((uint64_t)x21 * x46) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + ((uint64_t)x24 * x43))))));
+ { uint64_t x53 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((uint64_t)x17 * x27))))))) + (0xbd * ((0x2 * ((uint64_t)x19 * x46)) + ((0x2 * ((uint64_t)x21 * x47)) + (((uint64_t)x23 * x45) + ((0x2 * ((uint64_t)x25 * x43)) + (0x2 * ((uint64_t)x24 * x41))))))));
+ { uint64_t x54 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + ((0x2 * ((uint64_t)x13 * x29)) + ((uint64_t)x15 * x27)))))) + (0xbd * (((uint64_t)x17 * x46) + ((0x2 * ((uint64_t)x19 * x47)) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + ((0x2 * ((uint64_t)x25 * x41)) + ((uint64_t)x24 * x39))))))));
+ { uint64_t x55 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + ((uint64_t)x13 * x27))))) + (0xbd * (((uint64_t)x15 * x46) + (((uint64_t)x17 * x47) + (((uint64_t)x19 * x45) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + (((uint64_t)x25 * x39) + ((uint64_t)x24 * x37)))))))));
+ { uint64_t x56 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((uint64_t)x11 * x27)))) + (0xbd * ((0x2 * ((uint64_t)x13 * x46)) + ((0x2 * ((uint64_t)x15 * x47)) + (((uint64_t)x17 * x45) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + (((uint64_t)x23 * x39) + ((0x2 * ((uint64_t)x25 * x37)) + (0x2 * ((uint64_t)x24 * x35)))))))))));
+ { uint64_t x57 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((uint64_t)x9 * x27))) + (0xbd * (((uint64_t)x11 * x46) + ((0x2 * ((uint64_t)x13 * x47)) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((0x2 * ((uint64_t)x19 * x41)) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + ((0x2 * ((uint64_t)x25 * x35)) + ((uint64_t)x24 * x33)))))))))));
+ { uint64_t x58 = ((((uint64_t)x5 * x29) + ((uint64_t)x7 * x27)) + (0xbd * (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31))))))))))));
+ { uint64_t x59 = (((uint64_t)x5 * x27) + (0xbd * ((0x2 * ((uint64_t)x7 * x46)) + ((0x2 * ((uint64_t)x9 * x47)) + (((uint64_t)x11 * x45) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + (((uint64_t)x17 * x39) + ((0x2 * ((uint64_t)x19 * x37)) + ((0x2 * ((uint64_t)x21 * x35)) + (((uint64_t)x23 * x33) + ((0x2 * ((uint64_t)x25 * x31)) + (0x2 * ((uint64_t)x24 * x29))))))))))))));
+ { uint64_t x60 = (x59 >> 0x16);
+ { uint32_t x61 = ((uint32_t)x59 & 0x3fffff);
+ { uint64_t x62 = (x60 + x58);
+ { uint64_t x63 = (x62 >> 0x15);
+ { uint32_t x64 = ((uint32_t)x62 & 0x1fffff);
+ { uint64_t x65 = (x63 + x57);
+ { uint64_t x66 = (x65 >> 0x15);
+ { uint32_t x67 = ((uint32_t)x65 & 0x1fffff);
+ { uint64_t x68 = (x66 + x56);
+ { uint64_t x69 = (x68 >> 0x16);
+ { uint32_t x70 = ((uint32_t)x68 & 0x3fffff);
+ { uint64_t x71 = (x69 + x55);
+ { uint64_t x72 = (x71 >> 0x15);
+ { uint32_t x73 = ((uint32_t)x71 & 0x1fffff);
+ { uint64_t x74 = (x72 + x54);
+ { uint64_t x75 = (x74 >> 0x15);
+ { uint32_t x76 = ((uint32_t)x74 & 0x1fffff);
+ { uint64_t x77 = (x75 + x53);
+ { uint64_t x78 = (x77 >> 0x16);
+ { uint32_t x79 = ((uint32_t)x77 & 0x3fffff);
+ { uint64_t x80 = (x78 + x52);
+ { uint64_t x81 = (x80 >> 0x15);
+ { uint32_t x82 = ((uint32_t)x80 & 0x1fffff);
+ { uint64_t x83 = (x81 + x51);
+ { uint64_t x84 = (x83 >> 0x15);
+ { uint32_t x85 = ((uint32_t)x83 & 0x1fffff);
+ { uint64_t x86 = (x84 + x50);
+ { uint64_t x87 = (x86 >> 0x16);
+ { uint32_t x88 = ((uint32_t)x86 & 0x3fffff);
+ { uint64_t x89 = (x87 + x49);
+ { uint64_t x90 = (x89 >> 0x15);
+ { uint32_t x91 = ((uint32_t)x89 & 0x1fffff);
+ { uint64_t x92 = (x90 + x48);
+ { uint32_t x93 = (uint32_t) (x92 >> 0x15);
+ { uint32_t x94 = ((uint32_t)x92 & 0x1fffff);
+ { uint64_t x95 = (x61 + ((uint64_t)0xbd * x93));
+ { uint32_t x96 = (uint32_t) (x95 >> 0x16);
+ { uint32_t x97 = ((uint32_t)x95 & 0x3fffff);
+ { uint32_t x98 = (x96 + x64);
+ { uint32_t x99 = (x98 >> 0x15);
+ { uint32_t x100 = (x98 & 0x1fffff);
+ out[0] = x97;
+ out[1] = x100;
+ out[2] = (x99 + x67);
+ out[3] = x70;
+ out[4] = x73;
+ out[5] = x76;
+ out[6] = x79;
+ out[7] = x82;
+ out[8] = x85;
+ out[9] = x88;
+ out[10] = x91;
+ out[11] = x94;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e256m189/fesquare.c b/src/Specific/solinas32_2e256m189/fesquare.c
index 5538791c9..99dbf4656 100644
--- a/src/Specific/solinas32_2e256m189/fesquare.c
+++ b/src/Specific/solinas32_2e256m189/fesquare.c
@@ -1,86 +1,80 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x23 = (((uint64_t)x2 * x21) + ((0x2 * ((uint64_t)x4 * x22)) + (((uint64_t)x6 * x20) + (((uint64_t)x8 * x18) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + (((uint64_t)x18 * x8) + (((uint64_t)x20 * x6) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x21 * x2))))))))))));
-{ uint64_t x24 = ((((uint64_t)x2 * x22) + (((uint64_t)x4 * x20) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + (((uint64_t)x20 * x4) + ((uint64_t)x22 * x2))))))))))) + (0xbd * ((uint64_t)x21 * x21)));
-{ uint64_t x25 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0xbd * ((0x2 * ((uint64_t)x22 * x21)) + (0x2 * ((uint64_t)x21 * x22)))));
-{ uint64_t x26 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + ((0x2 * ((uint64_t)x10 * x10)) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0xbd * (((uint64_t)x20 * x21) + ((0x2 * ((uint64_t)x22 * x22)) + ((uint64_t)x21 * x20)))));
-{ uint64_t x27 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0xbd * (((uint64_t)x18 * x21) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x21 * x18))))));
-{ uint64_t x28 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0xbd * ((0x2 * ((uint64_t)x16 * x21)) + ((0x2 * ((uint64_t)x18 * x22)) + (((uint64_t)x20 * x20) + ((0x2 * ((uint64_t)x22 * x18)) + (0x2 * ((uint64_t)x21 * x16))))))));
-{ uint64_t x29 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0xbd * (((uint64_t)x14 * x21) + ((0x2 * ((uint64_t)x16 * x22)) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + ((0x2 * ((uint64_t)x22 * x16)) + ((uint64_t)x21 * x14))))))));
-{ uint64_t x30 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0xbd * (((uint64_t)x12 * x21) + (((uint64_t)x14 * x22) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + ((uint64_t)x21 * x12)))))))));
-{ uint64_t x31 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0xbd * ((0x2 * ((uint64_t)x10 * x21)) + ((0x2 * ((uint64_t)x12 * x22)) + (((uint64_t)x14 * x20) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + (((uint64_t)x20 * x14) + ((0x2 * ((uint64_t)x22 * x12)) + (0x2 * ((uint64_t)x21 * x10)))))))))));
-{ uint64_t x32 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0xbd * (((uint64_t)x8 * x21) + ((0x2 * ((uint64_t)x10 * x22)) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + ((0x2 * ((uint64_t)x16 * x16)) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + ((0x2 * ((uint64_t)x22 * x10)) + ((uint64_t)x21 * x8)))))))))));
-{ uint64_t x33 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0xbd * (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6))))))))))));
-{ uint64_t x34 = (((uint64_t)x2 * x2) + (0xbd * ((0x2 * ((uint64_t)x4 * x21)) + ((0x2 * ((uint64_t)x6 * x22)) + (((uint64_t)x8 * x20) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + (((uint64_t)x20 * x8) + ((0x2 * ((uint64_t)x22 * x6)) + (0x2 * ((uint64_t)x21 * x4))))))))))))));
-{ uint64_t x35 = (x34 >> 0x16);
-{ uint32_t x36 = ((uint32_t)x34 & 0x3fffff);
-{ uint64_t x37 = (x35 + x33);
-{ uint64_t x38 = (x37 >> 0x15);
-{ uint32_t x39 = ((uint32_t)x37 & 0x1fffff);
-{ uint64_t x40 = (x38 + x32);
-{ uint64_t x41 = (x40 >> 0x15);
-{ uint32_t x42 = ((uint32_t)x40 & 0x1fffff);
-{ uint64_t x43 = (x41 + x31);
-{ uint64_t x44 = (x43 >> 0x16);
-{ uint32_t x45 = ((uint32_t)x43 & 0x3fffff);
-{ uint64_t x46 = (x44 + x30);
-{ uint64_t x47 = (x46 >> 0x15);
-{ uint32_t x48 = ((uint32_t)x46 & 0x1fffff);
-{ uint64_t x49 = (x47 + x29);
-{ uint64_t x50 = (x49 >> 0x15);
-{ uint32_t x51 = ((uint32_t)x49 & 0x1fffff);
-{ uint64_t x52 = (x50 + x28);
-{ uint64_t x53 = (x52 >> 0x16);
-{ uint32_t x54 = ((uint32_t)x52 & 0x3fffff);
-{ uint64_t x55 = (x53 + x27);
-{ uint64_t x56 = (x55 >> 0x15);
-{ uint32_t x57 = ((uint32_t)x55 & 0x1fffff);
-{ uint64_t x58 = (x56 + x26);
-{ uint64_t x59 = (x58 >> 0x15);
-{ uint32_t x60 = ((uint32_t)x58 & 0x1fffff);
-{ uint64_t x61 = (x59 + x25);
-{ uint64_t x62 = (x61 >> 0x16);
-{ uint32_t x63 = ((uint32_t)x61 & 0x3fffff);
-{ uint64_t x64 = (x62 + x24);
-{ uint64_t x65 = (x64 >> 0x15);
-{ uint32_t x66 = ((uint32_t)x64 & 0x1fffff);
-{ uint64_t x67 = (x65 + x23);
-{ uint32_t x68 = (uint32_t) (x67 >> 0x15);
-{ uint32_t x69 = ((uint32_t)x67 & 0x1fffff);
-{ uint64_t x70 = (x36 + ((uint64_t)0xbd * x68));
-{ uint32_t x71 = (uint32_t) (x70 >> 0x16);
-{ uint32_t x72 = ((uint32_t)x70 & 0x3fffff);
-{ uint32_t x73 = (x71 + x39);
-{ uint32_t x74 = (x73 >> 0x15);
-{ uint32_t x75 = (x73 & 0x1fffff);
-out[0] = x69;
-out[1] = x66;
-out[2] = x63;
-out[3] = x60;
-out[4] = x57;
-out[5] = x54;
-out[6] = x51;
-out[7] = x48;
-out[8] = x45;
-out[9] = x74 + x42;
-out[10] = x75;
-out[11] = x72;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void fesquare(uint32_t out[12], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x23 = (((uint64_t)x2 * x21) + ((0x2 * ((uint64_t)x4 * x22)) + (((uint64_t)x6 * x20) + (((uint64_t)x8 * x18) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + (((uint64_t)x18 * x8) + (((uint64_t)x20 * x6) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x21 * x2))))))))))));
+ { uint64_t x24 = ((((uint64_t)x2 * x22) + (((uint64_t)x4 * x20) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + (((uint64_t)x20 * x4) + ((uint64_t)x22 * x2))))))))))) + (0xbd * ((uint64_t)x21 * x21)));
+ { uint64_t x25 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0xbd * ((0x2 * ((uint64_t)x22 * x21)) + (0x2 * ((uint64_t)x21 * x22)))));
+ { uint64_t x26 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + ((0x2 * ((uint64_t)x10 * x10)) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0xbd * (((uint64_t)x20 * x21) + ((0x2 * ((uint64_t)x22 * x22)) + ((uint64_t)x21 * x20)))));
+ { uint64_t x27 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0xbd * (((uint64_t)x18 * x21) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x21 * x18))))));
+ { uint64_t x28 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0xbd * ((0x2 * ((uint64_t)x16 * x21)) + ((0x2 * ((uint64_t)x18 * x22)) + (((uint64_t)x20 * x20) + ((0x2 * ((uint64_t)x22 * x18)) + (0x2 * ((uint64_t)x21 * x16))))))));
+ { uint64_t x29 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0xbd * (((uint64_t)x14 * x21) + ((0x2 * ((uint64_t)x16 * x22)) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + ((0x2 * ((uint64_t)x22 * x16)) + ((uint64_t)x21 * x14))))))));
+ { uint64_t x30 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0xbd * (((uint64_t)x12 * x21) + (((uint64_t)x14 * x22) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + ((uint64_t)x21 * x12)))))))));
+ { uint64_t x31 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0xbd * ((0x2 * ((uint64_t)x10 * x21)) + ((0x2 * ((uint64_t)x12 * x22)) + (((uint64_t)x14 * x20) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + (((uint64_t)x20 * x14) + ((0x2 * ((uint64_t)x22 * x12)) + (0x2 * ((uint64_t)x21 * x10)))))))))));
+ { uint64_t x32 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0xbd * (((uint64_t)x8 * x21) + ((0x2 * ((uint64_t)x10 * x22)) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + ((0x2 * ((uint64_t)x16 * x16)) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + ((0x2 * ((uint64_t)x22 * x10)) + ((uint64_t)x21 * x8)))))))))));
+ { uint64_t x33 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0xbd * (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6))))))))))));
+ { uint64_t x34 = (((uint64_t)x2 * x2) + (0xbd * ((0x2 * ((uint64_t)x4 * x21)) + ((0x2 * ((uint64_t)x6 * x22)) + (((uint64_t)x8 * x20) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + (((uint64_t)x20 * x8) + ((0x2 * ((uint64_t)x22 * x6)) + (0x2 * ((uint64_t)x21 * x4))))))))))))));
+ { uint64_t x35 = (x34 >> 0x16);
+ { uint32_t x36 = ((uint32_t)x34 & 0x3fffff);
+ { uint64_t x37 = (x35 + x33);
+ { uint64_t x38 = (x37 >> 0x15);
+ { uint32_t x39 = ((uint32_t)x37 & 0x1fffff);
+ { uint64_t x40 = (x38 + x32);
+ { uint64_t x41 = (x40 >> 0x15);
+ { uint32_t x42 = ((uint32_t)x40 & 0x1fffff);
+ { uint64_t x43 = (x41 + x31);
+ { uint64_t x44 = (x43 >> 0x16);
+ { uint32_t x45 = ((uint32_t)x43 & 0x3fffff);
+ { uint64_t x46 = (x44 + x30);
+ { uint64_t x47 = (x46 >> 0x15);
+ { uint32_t x48 = ((uint32_t)x46 & 0x1fffff);
+ { uint64_t x49 = (x47 + x29);
+ { uint64_t x50 = (x49 >> 0x15);
+ { uint32_t x51 = ((uint32_t)x49 & 0x1fffff);
+ { uint64_t x52 = (x50 + x28);
+ { uint64_t x53 = (x52 >> 0x16);
+ { uint32_t x54 = ((uint32_t)x52 & 0x3fffff);
+ { uint64_t x55 = (x53 + x27);
+ { uint64_t x56 = (x55 >> 0x15);
+ { uint32_t x57 = ((uint32_t)x55 & 0x1fffff);
+ { uint64_t x58 = (x56 + x26);
+ { uint64_t x59 = (x58 >> 0x15);
+ { uint32_t x60 = ((uint32_t)x58 & 0x1fffff);
+ { uint64_t x61 = (x59 + x25);
+ { uint64_t x62 = (x61 >> 0x16);
+ { uint32_t x63 = ((uint32_t)x61 & 0x3fffff);
+ { uint64_t x64 = (x62 + x24);
+ { uint64_t x65 = (x64 >> 0x15);
+ { uint32_t x66 = ((uint32_t)x64 & 0x1fffff);
+ { uint64_t x67 = (x65 + x23);
+ { uint32_t x68 = (uint32_t) (x67 >> 0x15);
+ { uint32_t x69 = ((uint32_t)x67 & 0x1fffff);
+ { uint64_t x70 = (x36 + ((uint64_t)0xbd * x68));
+ { uint32_t x71 = (uint32_t) (x70 >> 0x16);
+ { uint32_t x72 = ((uint32_t)x70 & 0x3fffff);
+ { uint32_t x73 = (x71 + x39);
+ { uint32_t x74 = (x73 >> 0x15);
+ { uint32_t x75 = (x73 & 0x1fffff);
+ out[0] = x72;
+ out[1] = x75;
+ out[2] = (x74 + x42);
+ out[3] = x45;
+ out[4] = x48;
+ out[5] = x51;
+ out[6] = x54;
+ out[7] = x57;
+ out[8] = x60;
+ out[9] = x63;
+ out[10] = x66;
+ out[11] = x69;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e256m189/freeze.c b/src/Specific/solinas32_2e256m189/freeze.c
index f0233bbaa..72d762bc8 100644
--- a/src/Specific/solinas32_2e256m189/freeze.c
+++ b/src/Specific/solinas32_2e256m189/freeze.c
@@ -1,25 +1,64 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x24;
-out[1] = uint8_t x25 = Op Syntax.SubWithGetBorrow 22 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3fff43;;
+static void freeze(uint32_t out[12], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x3fff43);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x4, 0x1fffff);
+ { uint32_t x30, uint8_t x31 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x28, Return x6, 0x1fffff);
+ { uint32_t x33, uint8_t x34 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x31, Return x8, 0x3fffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x34, Return x10, 0x1fffff);
+ { uint32_t x39, uint8_t x40 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x12, 0x1fffff);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x40, Return x14, 0x3fffff);
+ { uint32_t x45, uint8_t x46 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x43, Return x16, 0x1fffff);
+ { uint32_t x48, uint8_t x49 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x46, Return x18, 0x1fffff);
+ { uint32_t x51, uint8_t x52 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x49, Return x20, 0x3fffff);
+ { uint32_t x54, uint8_t x55 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x52, Return x22, 0x1fffff);
+ { uint32_t x57, uint8_t x58 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x55, Return x21, 0x1fffff);
+ { uint32_t x59 = (uint32_t)cmovznz(x58, 0x0, 0xffffffff);
+ { uint32_t x60 = (x59 & 0x3fff43);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x24, Return x60);
+ { uint32_t x64 = (x59 & 0x1fffff);
+ { uint32_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x27, Return x64);
+ { uint32_t x68 = (x59 & 0x1fffff);
+ { uint32_t x70, uint8_t x71 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x67, Return x30, Return x68);
+ { uint32_t x72 = (x59 & 0x3fffff);
+ { uint32_t x74, uint8_t x75 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x71, Return x33, Return x72);
+ { uint32_t x76 = (x59 & 0x1fffff);
+ { uint32_t x78, uint8_t x79 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x36, Return x76);
+ { uint32_t x80 = (x59 & 0x1fffff);
+ { uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x79, Return x39, Return x80);
+ { uint32_t x84 = (x59 & 0x3fffff);
+ { uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x42, Return x84);
+ { uint32_t x88 = (x59 & 0x1fffff);
+ { uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x45, Return x88);
+ { uint32_t x92 = (x59 & 0x1fffff);
+ { uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x48, Return x92);
+ { uint32_t x96 = (x59 & 0x3fffff);
+ { uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x51, Return x96);
+ { uint32_t x100 = (x59 & 0x1fffff);
+ { uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x54, Return x100);
+ { uint32_t x104 = (x59 & 0x1fffff);
+ { uint32_t x106, uint8_t _ = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x57, Return x104);
+ out[0] = x62;
+ out[1] = x66;
+ out[2] = x70;
+ out[3] = x74;
+ out[4] = x78;
+ out[5] = x82;
+ out[6] = x86;
+ out[7] = x90;
+ out[8] = x94;
+ out[9] = x98;
+ out[10] = x102;
+ out[11] = x106;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e256m2e224p2e192p2e96m1/freeze.c b/src/Specific/solinas32_2e256m2e224p2e192p2e96m1/freeze.c
index 831304167..e0a528d67 100644
--- a/src/Specific/solinas32_2e256m2e224p2e192p2e96m1/freeze.c
+++ b/src/Specific/solinas32_2e256m2e224p2e192p2e96m1/freeze.c
@@ -1,25 +1,60 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x24;
-out[1] = uint8_t x25 = Op Syntax.SubWithGetBorrow 22 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3fffff;;
+static void freeze(uint32_t out[12], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x3fffff);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x4, 0x1fffff);
+ { uint32_t x30, uint8_t x31 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x28, Return x6, 0x1fffff);
+ { uint32_t x33, uint8_t x34 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x31, Return x8, 0x3fffff);
+ { uint32_t x36, ℤ x37 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) Syntax.TZ) (Return x34, Return x10, 0x3ff);
+ { uint32_t x39, ℤ x40 = Op (Syntax.SubWithGetBorrow 21 Syntax.TZ (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) Syntax.TZ) (Return x37, Return x12, 0x0);
+ { uint32_t x42, ℤ x43 = Op (Syntax.SubWithGetBorrow 22 Syntax.TZ (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) Syntax.TZ) (Return x40, Return x14, 0x0);
+ { uint32_t x45, ℤ x46 = Op (Syntax.SubWithGetBorrow 21 Syntax.TZ (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) Syntax.TZ) (Return x43, Return x16, 0x0);
+ { uint32_t x48, ℤ x49 = Op (Syntax.SubWithGetBorrow 21 Syntax.TZ (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) Syntax.TZ) (Return x46, Return x18, 0x0);
+ { uint32_t x51, ℤ x52 = Op (Syntax.SubWithGetBorrow 22 Syntax.TZ (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) Syntax.TZ) (Return x49, Return x20, 0x1);
+ { uint32_t x54, uint8_t x55 = Op (Syntax.SubWithGetBorrow 21 Syntax.TZ (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x52, Return x22, 0x1ffc00);
+ { uint32_t x57, uint8_t x58 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x55, Return x21, 0x1fffff);
+ { uint32_t x59 = (uint32_t)cmovznz(x58, 0x0, 0xffffffff);
+ { uint32_t x60 = (x59 & 0x3fffff);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x24, Return x60);
+ { uint32_t x64 = (x59 & 0x1fffff);
+ { uint32_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x27, Return x64);
+ { uint32_t x68 = (x59 & 0x1fffff);
+ { uint32_t x70, uint8_t x71 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x67, Return x30, Return x68);
+ { uint32_t x72 = (x59 & 0x3fffff);
+ { uint32_t x74, uint8_t x75 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x71, Return x33, Return x72);
+ { uint32_t x76 = (x59 & 0x3ff);
+ { uint32_t x78, uint8_t x79 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x36, Return x76);
+ { uint32_t x81, uint8_t x82 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x79, Return x39, 0x0);
+ { uint32_t x84, uint8_t x85 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x82, Return x42, 0x0);
+ { uint32_t x87, uint8_t x88 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x85, Return x45, 0x0);
+ { uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x88, Return x48, 0x0);
+ { uint8_t x92 = ((uint8_t)x59 & 0x1);
+ { uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x51, Return x92);
+ { uint32_t x96 = (x59 & 0x1ffc00);
+ { uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x54, Return x96);
+ { uint32_t x100 = (x59 & 0x1fffff);
+ { uint32_t x102, uint8_t _ = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x57, Return x100);
+ out[0] = x62;
+ out[1] = x66;
+ out[2] = x70;
+ out[3] = x74;
+ out[4] = x78;
+ out[5] = x81;
+ out[6] = x84;
+ out[7] = x87;
+ out[8] = x90;
+ out[9] = x94;
+ out[10] = x98;
+ out[11] = x102;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e256m2e32m977/femul.c b/src/Specific/solinas32_2e256m2e32m977/femul.c
index e4222a9ab..69b723883 100644
--- a/src/Specific/solinas32_2e256m2e32m977/femul.c
+++ b/src/Specific/solinas32_2e256m2e32m977/femul.c
@@ -1,97 +1,103 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
-{ uint64_t x48 = ((((uint64_t)x5 * x46) + ((0x2 * ((uint64_t)x7 * x47)) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + ((0x2 * ((uint64_t)x13 * x41)) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((0x2 * ((uint64_t)x19 * x35)) + (((uint64_t)x21 * x33) + (((uint64_t)x23 * x31) + ((0x2 * ((uint64_t)x25 * x29)) + ((uint64_t)x24 * x27)))))))))))) + (0x800 * ((uint64_t)x24 * x46)));
-{ uint64_t x49 = ((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + (((uint64_t)x23 * x29) + ((uint64_t)x25 * x27))))))))))) + ((0x3d1 * ((uint64_t)x24 * x46)) + (0x400 * ((0x2 * ((uint64_t)x25 * x46)) + (0x2 * ((uint64_t)x24 * x47))))));
-{ uint64_t x50 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + (((uint64_t)x11 * x39) + ((0x2 * ((uint64_t)x13 * x37)) + ((0x2 * ((uint64_t)x15 * x35)) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + ((0x2 * ((uint64_t)x21 * x29)) + ((uint64_t)x23 * x27)))))))))) + ((0x3d1 * ((0x2 * ((uint64_t)x25 * x46)) + (0x2 * ((uint64_t)x24 * x47)))) + (0x800 * (((uint64_t)x23 * x46) + ((0x2 * ((uint64_t)x25 * x47)) + ((uint64_t)x24 * x45))))));
-{ uint64_t x51 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x21 * x27))))))))) + ((0x3d1 * (((uint64_t)x23 * x46) + ((0x2 * ((uint64_t)x25 * x47)) + ((uint64_t)x24 * x45)))) + (0x800 * (((uint64_t)x21 * x46) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + ((uint64_t)x24 * x43)))))));
-{ uint64_t x52 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x19 * x27)))))))) + ((0x3d1 * (((uint64_t)x21 * x46) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + ((uint64_t)x24 * x43))))) + (0x400 * ((0x2 * ((uint64_t)x19 * x46)) + ((0x2 * ((uint64_t)x21 * x47)) + (((uint64_t)x23 * x45) + ((0x2 * ((uint64_t)x25 * x43)) + (0x2 * ((uint64_t)x24 * x41)))))))));
-{ uint64_t x53 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((uint64_t)x17 * x27))))))) + ((0x3d1 * ((0x2 * ((uint64_t)x19 * x46)) + ((0x2 * ((uint64_t)x21 * x47)) + (((uint64_t)x23 * x45) + ((0x2 * ((uint64_t)x25 * x43)) + (0x2 * ((uint64_t)x24 * x41))))))) + (0x800 * (((uint64_t)x17 * x46) + ((0x2 * ((uint64_t)x19 * x47)) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + ((0x2 * ((uint64_t)x25 * x41)) + ((uint64_t)x24 * x39)))))))));
-{ uint64_t x54 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + ((0x2 * ((uint64_t)x13 * x29)) + ((uint64_t)x15 * x27)))))) + ((0x3d1 * (((uint64_t)x17 * x46) + ((0x2 * ((uint64_t)x19 * x47)) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + ((0x2 * ((uint64_t)x25 * x41)) + ((uint64_t)x24 * x39))))))) + (0x800 * (((uint64_t)x15 * x46) + (((uint64_t)x17 * x47) + (((uint64_t)x19 * x45) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + (((uint64_t)x25 * x39) + ((uint64_t)x24 * x37))))))))));
-{ uint64_t x55 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + ((uint64_t)x13 * x27))))) + ((0x3d1 * (((uint64_t)x15 * x46) + (((uint64_t)x17 * x47) + (((uint64_t)x19 * x45) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + (((uint64_t)x25 * x39) + ((uint64_t)x24 * x37)))))))) + (0x400 * ((0x2 * ((uint64_t)x13 * x46)) + ((0x2 * ((uint64_t)x15 * x47)) + (((uint64_t)x17 * x45) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + (((uint64_t)x23 * x39) + ((0x2 * ((uint64_t)x25 * x37)) + (0x2 * ((uint64_t)x24 * x35))))))))))));
-{ uint64_t x56 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((uint64_t)x11 * x27)))) + ((0x3d1 * ((0x2 * ((uint64_t)x13 * x46)) + ((0x2 * ((uint64_t)x15 * x47)) + (((uint64_t)x17 * x45) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + (((uint64_t)x23 * x39) + ((0x2 * ((uint64_t)x25 * x37)) + (0x2 * ((uint64_t)x24 * x35)))))))))) + (0x800 * (((uint64_t)x11 * x46) + ((0x2 * ((uint64_t)x13 * x47)) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((0x2 * ((uint64_t)x19 * x41)) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + ((0x2 * ((uint64_t)x25 * x35)) + ((uint64_t)x24 * x33))))))))))));
-{ uint64_t x57 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((uint64_t)x9 * x27))) + ((0x3d1 * (((uint64_t)x11 * x46) + ((0x2 * ((uint64_t)x13 * x47)) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((0x2 * ((uint64_t)x19 * x41)) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + ((0x2 * ((uint64_t)x25 * x35)) + ((uint64_t)x24 * x33)))))))))) + (0x800 * (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31)))))))))))));
-{ uint64_t x58 = ((((uint64_t)x5 * x29) + ((uint64_t)x7 * x27)) + ((0x3d1 * (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31))))))))))) + (0x400 * ((0x2 * ((uint64_t)x7 * x46)) + ((0x2 * ((uint64_t)x9 * x47)) + (((uint64_t)x11 * x45) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + (((uint64_t)x17 * x39) + ((0x2 * ((uint64_t)x19 * x37)) + ((0x2 * ((uint64_t)x21 * x35)) + (((uint64_t)x23 * x33) + ((0x2 * ((uint64_t)x25 * x31)) + (0x2 * ((uint64_t)x24 * x29)))))))))))))));
-{ uint64_t x59 = (((uint64_t)x5 * x27) + (0x3d1 * ((0x2 * ((uint64_t)x7 * x46)) + ((0x2 * ((uint64_t)x9 * x47)) + (((uint64_t)x11 * x45) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + (((uint64_t)x17 * x39) + ((0x2 * ((uint64_t)x19 * x37)) + ((0x2 * ((uint64_t)x21 * x35)) + (((uint64_t)x23 * x33) + ((0x2 * ((uint64_t)x25 * x31)) + (0x2 * ((uint64_t)x24 * x29))))))))))))));
-{ uint64_t x60 = (x59 >> 0x16);
-{ uint32_t x61 = ((uint32_t)x59 & 0x3fffff);
-{ uint64_t x62 = (x48 >> 0x15);
-{ uint32_t x63 = ((uint32_t)x48 & 0x1fffff);
-{ uint64_t x64 = ((0x200000 * x62) + x63);
-{ uint64_t x65 = (x64 >> 0x15);
-{ uint32_t x66 = ((uint32_t)x64 & 0x1fffff);
-{ uint64_t x67 = ((x60 + x58) + (0x400 * x65));
-{ uint64_t x68 = (x67 >> 0x15);
-{ uint32_t x69 = ((uint32_t)x67 & 0x1fffff);
-{ uint64_t x70 = (x61 + (0x3d1 * x65));
-{ uint32_t x71 = (uint32_t) (x70 >> 0x16);
-{ uint32_t x72 = ((uint32_t)x70 & 0x3fffff);
-{ uint64_t x73 = (x68 + x57);
-{ uint64_t x74 = (x73 >> 0x15);
-{ uint32_t x75 = ((uint32_t)x73 & 0x1fffff);
-{ uint64_t x76 = (x74 + x56);
-{ uint64_t x77 = (x76 >> 0x16);
-{ uint32_t x78 = ((uint32_t)x76 & 0x3fffff);
-{ uint64_t x79 = (x77 + x55);
-{ uint64_t x80 = (x79 >> 0x15);
-{ uint32_t x81 = ((uint32_t)x79 & 0x1fffff);
-{ uint64_t x82 = (x80 + x54);
-{ uint64_t x83 = (x82 >> 0x15);
-{ uint32_t x84 = ((uint32_t)x82 & 0x1fffff);
-{ uint64_t x85 = (x83 + x53);
-{ uint64_t x86 = (x85 >> 0x16);
-{ uint32_t x87 = ((uint32_t)x85 & 0x3fffff);
-{ uint64_t x88 = (x86 + x52);
-{ uint64_t x89 = (x88 >> 0x15);
-{ uint32_t x90 = ((uint32_t)x88 & 0x1fffff);
-{ uint64_t x91 = (x89 + x51);
-{ uint64_t x92 = (x91 >> 0x15);
-{ uint32_t x93 = ((uint32_t)x91 & 0x1fffff);
-{ uint64_t x94 = (x92 + x50);
-{ uint64_t x95 = (x94 >> 0x16);
-{ uint32_t x96 = ((uint32_t)x94 & 0x3fffff);
-{ uint64_t x97 = (x95 + x49);
-{ uint64_t x98 = (x97 >> 0x15);
-{ uint32_t x99 = ((uint32_t)x97 & 0x1fffff);
-{ uint64_t x100 = (x98 + x66);
-{ uint32_t x101 = (uint32_t) (x100 >> 0x15);
-{ uint32_t x102 = ((uint32_t)x100 & 0x1fffff);
-{ uint64_t x103 = (((uint64_t)0x200000 * x101) + x102);
-{ uint32_t x104 = (uint32_t) (x103 >> 0x15);
-{ uint32_t x105 = ((uint32_t)x103 & 0x1fffff);
-{ uint32_t x106 = ((x71 + x69) + (0x400 * x104));
-{ uint32_t x107 = (x106 >> 0x15);
-{ uint32_t x108 = (x106 & 0x1fffff);
-{ uint32_t x109 = (x72 + (0x3d1 * x104));
-{ uint32_t x110 = (x109 >> 0x16);
-{ uint32_t x111 = (x109 & 0x3fffff);
-out[0] = x105;
-out[1] = x99;
-out[2] = x96;
-out[3] = x93;
-out[4] = x90;
-out[5] = x87;
-out[6] = x84;
-out[7] = x81;
-out[8] = x78;
-out[9] = x107 + x75;
-out[10] = x110 + x108;
-out[11] = x111;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void femul(uint32_t out[12], const uint32_t in1[12], const uint32_t in2[12]) {
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x46 = in2[11];
+ { const uint32_t x47 = in2[10];
+ { const uint32_t x45 = in2[9];
+ { const uint32_t x43 = in2[8];
+ { const uint32_t x41 = in2[7];
+ { const uint32_t x39 = in2[6];
+ { const uint32_t x37 = in2[5];
+ { const uint32_t x35 = in2[4];
+ { const uint32_t x33 = in2[3];
+ { const uint32_t x31 = in2[2];
+ { const uint32_t x29 = in2[1];
+ { const uint32_t x27 = in2[0];
+ { uint64_t x48 = ((((uint64_t)x5 * x46) + ((0x2 * ((uint64_t)x7 * x47)) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + ((0x2 * ((uint64_t)x13 * x41)) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((0x2 * ((uint64_t)x19 * x35)) + (((uint64_t)x21 * x33) + (((uint64_t)x23 * x31) + ((0x2 * ((uint64_t)x25 * x29)) + ((uint64_t)x24 * x27)))))))))))) + (0x800 * ((uint64_t)x24 * x46)));
+ { uint64_t x49 = ((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + (((uint64_t)x23 * x29) + ((uint64_t)x25 * x27))))))))))) + ((0x3d1 * ((uint64_t)x24 * x46)) + (0x400 * ((0x2 * ((uint64_t)x25 * x46)) + (0x2 * ((uint64_t)x24 * x47))))));
+ { uint64_t x50 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + (((uint64_t)x11 * x39) + ((0x2 * ((uint64_t)x13 * x37)) + ((0x2 * ((uint64_t)x15 * x35)) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + ((0x2 * ((uint64_t)x21 * x29)) + ((uint64_t)x23 * x27)))))))))) + ((0x3d1 * ((0x2 * ((uint64_t)x25 * x46)) + (0x2 * ((uint64_t)x24 * x47)))) + (0x800 * (((uint64_t)x23 * x46) + ((0x2 * ((uint64_t)x25 * x47)) + ((uint64_t)x24 * x45))))));
+ { uint64_t x51 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x21 * x27))))))))) + ((0x3d1 * (((uint64_t)x23 * x46) + ((0x2 * ((uint64_t)x25 * x47)) + ((uint64_t)x24 * x45)))) + (0x800 * (((uint64_t)x21 * x46) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + ((uint64_t)x24 * x43)))))));
+ { uint64_t x52 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x19 * x27)))))))) + ((0x3d1 * (((uint64_t)x21 * x46) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + ((uint64_t)x24 * x43))))) + (0x400 * ((0x2 * ((uint64_t)x19 * x46)) + ((0x2 * ((uint64_t)x21 * x47)) + (((uint64_t)x23 * x45) + ((0x2 * ((uint64_t)x25 * x43)) + (0x2 * ((uint64_t)x24 * x41)))))))));
+ { uint64_t x53 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((uint64_t)x17 * x27))))))) + ((0x3d1 * ((0x2 * ((uint64_t)x19 * x46)) + ((0x2 * ((uint64_t)x21 * x47)) + (((uint64_t)x23 * x45) + ((0x2 * ((uint64_t)x25 * x43)) + (0x2 * ((uint64_t)x24 * x41))))))) + (0x800 * (((uint64_t)x17 * x46) + ((0x2 * ((uint64_t)x19 * x47)) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + ((0x2 * ((uint64_t)x25 * x41)) + ((uint64_t)x24 * x39)))))))));
+ { uint64_t x54 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + ((0x2 * ((uint64_t)x13 * x29)) + ((uint64_t)x15 * x27)))))) + ((0x3d1 * (((uint64_t)x17 * x46) + ((0x2 * ((uint64_t)x19 * x47)) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + ((0x2 * ((uint64_t)x25 * x41)) + ((uint64_t)x24 * x39))))))) + (0x800 * (((uint64_t)x15 * x46) + (((uint64_t)x17 * x47) + (((uint64_t)x19 * x45) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + (((uint64_t)x25 * x39) + ((uint64_t)x24 * x37))))))))));
+ { uint64_t x55 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + ((uint64_t)x13 * x27))))) + ((0x3d1 * (((uint64_t)x15 * x46) + (((uint64_t)x17 * x47) + (((uint64_t)x19 * x45) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + (((uint64_t)x25 * x39) + ((uint64_t)x24 * x37)))))))) + (0x400 * ((0x2 * ((uint64_t)x13 * x46)) + ((0x2 * ((uint64_t)x15 * x47)) + (((uint64_t)x17 * x45) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + (((uint64_t)x23 * x39) + ((0x2 * ((uint64_t)x25 * x37)) + (0x2 * ((uint64_t)x24 * x35))))))))))));
+ { uint64_t x56 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((uint64_t)x11 * x27)))) + ((0x3d1 * ((0x2 * ((uint64_t)x13 * x46)) + ((0x2 * ((uint64_t)x15 * x47)) + (((uint64_t)x17 * x45) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + (((uint64_t)x23 * x39) + ((0x2 * ((uint64_t)x25 * x37)) + (0x2 * ((uint64_t)x24 * x35)))))))))) + (0x800 * (((uint64_t)x11 * x46) + ((0x2 * ((uint64_t)x13 * x47)) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((0x2 * ((uint64_t)x19 * x41)) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + ((0x2 * ((uint64_t)x25 * x35)) + ((uint64_t)x24 * x33))))))))))));
+ { uint64_t x57 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((uint64_t)x9 * x27))) + ((0x3d1 * (((uint64_t)x11 * x46) + ((0x2 * ((uint64_t)x13 * x47)) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((0x2 * ((uint64_t)x19 * x41)) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + ((0x2 * ((uint64_t)x25 * x35)) + ((uint64_t)x24 * x33)))))))))) + (0x800 * (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31)))))))))))));
+ { uint64_t x58 = ((((uint64_t)x5 * x29) + ((uint64_t)x7 * x27)) + ((0x3d1 * (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31))))))))))) + (0x400 * ((0x2 * ((uint64_t)x7 * x46)) + ((0x2 * ((uint64_t)x9 * x47)) + (((uint64_t)x11 * x45) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + (((uint64_t)x17 * x39) + ((0x2 * ((uint64_t)x19 * x37)) + ((0x2 * ((uint64_t)x21 * x35)) + (((uint64_t)x23 * x33) + ((0x2 * ((uint64_t)x25 * x31)) + (0x2 * ((uint64_t)x24 * x29)))))))))))))));
+ { uint64_t x59 = (((uint64_t)x5 * x27) + (0x3d1 * ((0x2 * ((uint64_t)x7 * x46)) + ((0x2 * ((uint64_t)x9 * x47)) + (((uint64_t)x11 * x45) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + (((uint64_t)x17 * x39) + ((0x2 * ((uint64_t)x19 * x37)) + ((0x2 * ((uint64_t)x21 * x35)) + (((uint64_t)x23 * x33) + ((0x2 * ((uint64_t)x25 * x31)) + (0x2 * ((uint64_t)x24 * x29))))))))))))));
+ { uint64_t x60 = (x59 >> 0x16);
+ { uint32_t x61 = ((uint32_t)x59 & 0x3fffff);
+ { uint64_t x62 = (x48 >> 0x15);
+ { uint32_t x63 = ((uint32_t)x48 & 0x1fffff);
+ { uint64_t x64 = ((0x200000 * x62) + x63);
+ { uint64_t x65 = (x64 >> 0x15);
+ { uint32_t x66 = ((uint32_t)x64 & 0x1fffff);
+ { uint64_t x67 = ((x60 + x58) + (0x400 * x65));
+ { uint64_t x68 = (x67 >> 0x15);
+ { uint32_t x69 = ((uint32_t)x67 & 0x1fffff);
+ { uint64_t x70 = (x61 + (0x3d1 * x65));
+ { uint32_t x71 = (uint32_t) (x70 >> 0x16);
+ { uint32_t x72 = ((uint32_t)x70 & 0x3fffff);
+ { uint64_t x73 = (x68 + x57);
+ { uint64_t x74 = (x73 >> 0x15);
+ { uint32_t x75 = ((uint32_t)x73 & 0x1fffff);
+ { uint64_t x76 = (x74 + x56);
+ { uint64_t x77 = (x76 >> 0x16);
+ { uint32_t x78 = ((uint32_t)x76 & 0x3fffff);
+ { uint64_t x79 = (x77 + x55);
+ { uint64_t x80 = (x79 >> 0x15);
+ { uint32_t x81 = ((uint32_t)x79 & 0x1fffff);
+ { uint64_t x82 = (x80 + x54);
+ { uint64_t x83 = (x82 >> 0x15);
+ { uint32_t x84 = ((uint32_t)x82 & 0x1fffff);
+ { uint64_t x85 = (x83 + x53);
+ { uint64_t x86 = (x85 >> 0x16);
+ { uint32_t x87 = ((uint32_t)x85 & 0x3fffff);
+ { uint64_t x88 = (x86 + x52);
+ { uint64_t x89 = (x88 >> 0x15);
+ { uint32_t x90 = ((uint32_t)x88 & 0x1fffff);
+ { uint64_t x91 = (x89 + x51);
+ { uint64_t x92 = (x91 >> 0x15);
+ { uint32_t x93 = ((uint32_t)x91 & 0x1fffff);
+ { uint64_t x94 = (x92 + x50);
+ { uint64_t x95 = (x94 >> 0x16);
+ { uint32_t x96 = ((uint32_t)x94 & 0x3fffff);
+ { uint64_t x97 = (x95 + x49);
+ { uint64_t x98 = (x97 >> 0x15);
+ { uint32_t x99 = ((uint32_t)x97 & 0x1fffff);
+ { uint64_t x100 = (x98 + x66);
+ { uint32_t x101 = (uint32_t) (x100 >> 0x15);
+ { uint32_t x102 = ((uint32_t)x100 & 0x1fffff);
+ { uint64_t x103 = (((uint64_t)0x200000 * x101) + x102);
+ { uint32_t x104 = (uint32_t) (x103 >> 0x15);
+ { uint32_t x105 = ((uint32_t)x103 & 0x1fffff);
+ { uint32_t x106 = ((x71 + x69) + (0x400 * x104));
+ { uint32_t x107 = (x106 >> 0x15);
+ { uint32_t x108 = (x106 & 0x1fffff);
+ { uint32_t x109 = (x72 + (0x3d1 * x104));
+ { uint32_t x110 = (x109 >> 0x16);
+ { uint32_t x111 = (x109 & 0x3fffff);
+ out[0] = x111;
+ out[1] = (x110 + x108);
+ out[2] = (x107 + x75);
+ out[3] = x78;
+ out[4] = x81;
+ out[5] = x84;
+ out[6] = x87;
+ out[7] = x90;
+ out[8] = x93;
+ out[9] = x96;
+ out[10] = x99;
+ out[11] = x105;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e256m2e32m977/fesquare.c b/src/Specific/solinas32_2e256m2e32m977/fesquare.c
index bc8569a5f..7f795b431 100644
--- a/src/Specific/solinas32_2e256m2e32m977/fesquare.c
+++ b/src/Specific/solinas32_2e256m2e32m977/fesquare.c
@@ -1,97 +1,91 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x23 = ((((uint64_t)x2 * x21) + ((0x2 * ((uint64_t)x4 * x22)) + (((uint64_t)x6 * x20) + (((uint64_t)x8 * x18) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + (((uint64_t)x18 * x8) + (((uint64_t)x20 * x6) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x21 * x2)))))))))))) + (0x800 * ((uint64_t)x21 * x21)));
-{ uint64_t x24 = ((((uint64_t)x2 * x22) + (((uint64_t)x4 * x20) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + (((uint64_t)x20 * x4) + ((uint64_t)x22 * x2))))))))))) + ((0x3d1 * ((uint64_t)x21 * x21)) + (0x400 * ((0x2 * ((uint64_t)x22 * x21)) + (0x2 * ((uint64_t)x21 * x22))))));
-{ uint64_t x25 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + ((0x3d1 * ((0x2 * ((uint64_t)x22 * x21)) + (0x2 * ((uint64_t)x21 * x22)))) + (0x800 * (((uint64_t)x20 * x21) + ((0x2 * ((uint64_t)x22 * x22)) + ((uint64_t)x21 * x20))))));
-{ uint64_t x26 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + ((0x2 * ((uint64_t)x10 * x10)) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + ((0x3d1 * (((uint64_t)x20 * x21) + ((0x2 * ((uint64_t)x22 * x22)) + ((uint64_t)x21 * x20)))) + (0x800 * (((uint64_t)x18 * x21) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x21 * x18)))))));
-{ uint64_t x27 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + ((0x3d1 * (((uint64_t)x18 * x21) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x21 * x18))))) + (0x400 * ((0x2 * ((uint64_t)x16 * x21)) + ((0x2 * ((uint64_t)x18 * x22)) + (((uint64_t)x20 * x20) + ((0x2 * ((uint64_t)x22 * x18)) + (0x2 * ((uint64_t)x21 * x16)))))))));
-{ uint64_t x28 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + ((0x3d1 * ((0x2 * ((uint64_t)x16 * x21)) + ((0x2 * ((uint64_t)x18 * x22)) + (((uint64_t)x20 * x20) + ((0x2 * ((uint64_t)x22 * x18)) + (0x2 * ((uint64_t)x21 * x16))))))) + (0x800 * (((uint64_t)x14 * x21) + ((0x2 * ((uint64_t)x16 * x22)) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + ((0x2 * ((uint64_t)x22 * x16)) + ((uint64_t)x21 * x14)))))))));
-{ uint64_t x29 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + ((0x3d1 * (((uint64_t)x14 * x21) + ((0x2 * ((uint64_t)x16 * x22)) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + ((0x2 * ((uint64_t)x22 * x16)) + ((uint64_t)x21 * x14))))))) + (0x800 * (((uint64_t)x12 * x21) + (((uint64_t)x14 * x22) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + ((uint64_t)x21 * x12))))))))));
-{ uint64_t x30 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + ((0x3d1 * (((uint64_t)x12 * x21) + (((uint64_t)x14 * x22) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + ((uint64_t)x21 * x12)))))))) + (0x400 * ((0x2 * ((uint64_t)x10 * x21)) + ((0x2 * ((uint64_t)x12 * x22)) + (((uint64_t)x14 * x20) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + (((uint64_t)x20 * x14) + ((0x2 * ((uint64_t)x22 * x12)) + (0x2 * ((uint64_t)x21 * x10))))))))))));
-{ uint64_t x31 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + ((0x3d1 * ((0x2 * ((uint64_t)x10 * x21)) + ((0x2 * ((uint64_t)x12 * x22)) + (((uint64_t)x14 * x20) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + (((uint64_t)x20 * x14) + ((0x2 * ((uint64_t)x22 * x12)) + (0x2 * ((uint64_t)x21 * x10)))))))))) + (0x800 * (((uint64_t)x8 * x21) + ((0x2 * ((uint64_t)x10 * x22)) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + ((0x2 * ((uint64_t)x16 * x16)) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + ((0x2 * ((uint64_t)x22 * x10)) + ((uint64_t)x21 * x8))))))))))));
-{ uint64_t x32 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + ((0x3d1 * (((uint64_t)x8 * x21) + ((0x2 * ((uint64_t)x10 * x22)) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + ((0x2 * ((uint64_t)x16 * x16)) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + ((0x2 * ((uint64_t)x22 * x10)) + ((uint64_t)x21 * x8)))))))))) + (0x800 * (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6)))))))))))));
-{ uint64_t x33 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + ((0x3d1 * (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6))))))))))) + (0x400 * ((0x2 * ((uint64_t)x4 * x21)) + ((0x2 * ((uint64_t)x6 * x22)) + (((uint64_t)x8 * x20) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + (((uint64_t)x20 * x8) + ((0x2 * ((uint64_t)x22 * x6)) + (0x2 * ((uint64_t)x21 * x4)))))))))))))));
-{ uint64_t x34 = (((uint64_t)x2 * x2) + (0x3d1 * ((0x2 * ((uint64_t)x4 * x21)) + ((0x2 * ((uint64_t)x6 * x22)) + (((uint64_t)x8 * x20) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + (((uint64_t)x20 * x8) + ((0x2 * ((uint64_t)x22 * x6)) + (0x2 * ((uint64_t)x21 * x4))))))))))))));
-{ uint64_t x35 = (x34 >> 0x16);
-{ uint32_t x36 = ((uint32_t)x34 & 0x3fffff);
-{ uint64_t x37 = (x23 >> 0x15);
-{ uint32_t x38 = ((uint32_t)x23 & 0x1fffff);
-{ uint64_t x39 = ((0x200000 * x37) + x38);
-{ uint64_t x40 = (x39 >> 0x15);
-{ uint32_t x41 = ((uint32_t)x39 & 0x1fffff);
-{ uint64_t x42 = ((x35 + x33) + (0x400 * x40));
-{ uint64_t x43 = (x42 >> 0x15);
-{ uint32_t x44 = ((uint32_t)x42 & 0x1fffff);
-{ uint64_t x45 = (x36 + (0x3d1 * x40));
-{ uint32_t x46 = (uint32_t) (x45 >> 0x16);
-{ uint32_t x47 = ((uint32_t)x45 & 0x3fffff);
-{ uint64_t x48 = (x43 + x32);
-{ uint64_t x49 = (x48 >> 0x15);
-{ uint32_t x50 = ((uint32_t)x48 & 0x1fffff);
-{ uint64_t x51 = (x49 + x31);
-{ uint64_t x52 = (x51 >> 0x16);
-{ uint32_t x53 = ((uint32_t)x51 & 0x3fffff);
-{ uint64_t x54 = (x52 + x30);
-{ uint64_t x55 = (x54 >> 0x15);
-{ uint32_t x56 = ((uint32_t)x54 & 0x1fffff);
-{ uint64_t x57 = (x55 + x29);
-{ uint64_t x58 = (x57 >> 0x15);
-{ uint32_t x59 = ((uint32_t)x57 & 0x1fffff);
-{ uint64_t x60 = (x58 + x28);
-{ uint64_t x61 = (x60 >> 0x16);
-{ uint32_t x62 = ((uint32_t)x60 & 0x3fffff);
-{ uint64_t x63 = (x61 + x27);
-{ uint64_t x64 = (x63 >> 0x15);
-{ uint32_t x65 = ((uint32_t)x63 & 0x1fffff);
-{ uint64_t x66 = (x64 + x26);
-{ uint64_t x67 = (x66 >> 0x15);
-{ uint32_t x68 = ((uint32_t)x66 & 0x1fffff);
-{ uint64_t x69 = (x67 + x25);
-{ uint64_t x70 = (x69 >> 0x16);
-{ uint32_t x71 = ((uint32_t)x69 & 0x3fffff);
-{ uint64_t x72 = (x70 + x24);
-{ uint64_t x73 = (x72 >> 0x15);
-{ uint32_t x74 = ((uint32_t)x72 & 0x1fffff);
-{ uint64_t x75 = (x73 + x41);
-{ uint32_t x76 = (uint32_t) (x75 >> 0x15);
-{ uint32_t x77 = ((uint32_t)x75 & 0x1fffff);
-{ uint64_t x78 = (((uint64_t)0x200000 * x76) + x77);
-{ uint32_t x79 = (uint32_t) (x78 >> 0x15);
-{ uint32_t x80 = ((uint32_t)x78 & 0x1fffff);
-{ uint32_t x81 = ((x46 + x44) + (0x400 * x79));
-{ uint32_t x82 = (x81 >> 0x15);
-{ uint32_t x83 = (x81 & 0x1fffff);
-{ uint32_t x84 = (x47 + (0x3d1 * x79));
-{ uint32_t x85 = (x84 >> 0x16);
-{ uint32_t x86 = (x84 & 0x3fffff);
-out[0] = x80;
-out[1] = x74;
-out[2] = x71;
-out[3] = x68;
-out[4] = x65;
-out[5] = x62;
-out[6] = x59;
-out[7] = x56;
-out[8] = x53;
-out[9] = x82 + x50;
-out[10] = x85 + x83;
-out[11] = x86;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void fesquare(uint32_t out[12], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x23 = ((((uint64_t)x2 * x21) + ((0x2 * ((uint64_t)x4 * x22)) + (((uint64_t)x6 * x20) + (((uint64_t)x8 * x18) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + (((uint64_t)x18 * x8) + (((uint64_t)x20 * x6) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x21 * x2)))))))))))) + (0x800 * ((uint64_t)x21 * x21)));
+ { uint64_t x24 = ((((uint64_t)x2 * x22) + (((uint64_t)x4 * x20) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + (((uint64_t)x20 * x4) + ((uint64_t)x22 * x2))))))))))) + ((0x3d1 * ((uint64_t)x21 * x21)) + (0x400 * ((0x2 * ((uint64_t)x22 * x21)) + (0x2 * ((uint64_t)x21 * x22))))));
+ { uint64_t x25 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + ((0x3d1 * ((0x2 * ((uint64_t)x22 * x21)) + (0x2 * ((uint64_t)x21 * x22)))) + (0x800 * (((uint64_t)x20 * x21) + ((0x2 * ((uint64_t)x22 * x22)) + ((uint64_t)x21 * x20))))));
+ { uint64_t x26 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + ((0x2 * ((uint64_t)x10 * x10)) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + ((0x3d1 * (((uint64_t)x20 * x21) + ((0x2 * ((uint64_t)x22 * x22)) + ((uint64_t)x21 * x20)))) + (0x800 * (((uint64_t)x18 * x21) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x21 * x18)))))));
+ { uint64_t x27 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + ((0x3d1 * (((uint64_t)x18 * x21) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x21 * x18))))) + (0x400 * ((0x2 * ((uint64_t)x16 * x21)) + ((0x2 * ((uint64_t)x18 * x22)) + (((uint64_t)x20 * x20) + ((0x2 * ((uint64_t)x22 * x18)) + (0x2 * ((uint64_t)x21 * x16)))))))));
+ { uint64_t x28 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + ((0x3d1 * ((0x2 * ((uint64_t)x16 * x21)) + ((0x2 * ((uint64_t)x18 * x22)) + (((uint64_t)x20 * x20) + ((0x2 * ((uint64_t)x22 * x18)) + (0x2 * ((uint64_t)x21 * x16))))))) + (0x800 * (((uint64_t)x14 * x21) + ((0x2 * ((uint64_t)x16 * x22)) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + ((0x2 * ((uint64_t)x22 * x16)) + ((uint64_t)x21 * x14)))))))));
+ { uint64_t x29 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + ((0x3d1 * (((uint64_t)x14 * x21) + ((0x2 * ((uint64_t)x16 * x22)) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + ((0x2 * ((uint64_t)x22 * x16)) + ((uint64_t)x21 * x14))))))) + (0x800 * (((uint64_t)x12 * x21) + (((uint64_t)x14 * x22) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + ((uint64_t)x21 * x12))))))))));
+ { uint64_t x30 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + ((0x3d1 * (((uint64_t)x12 * x21) + (((uint64_t)x14 * x22) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + ((uint64_t)x21 * x12)))))))) + (0x400 * ((0x2 * ((uint64_t)x10 * x21)) + ((0x2 * ((uint64_t)x12 * x22)) + (((uint64_t)x14 * x20) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + (((uint64_t)x20 * x14) + ((0x2 * ((uint64_t)x22 * x12)) + (0x2 * ((uint64_t)x21 * x10))))))))))));
+ { uint64_t x31 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + ((0x3d1 * ((0x2 * ((uint64_t)x10 * x21)) + ((0x2 * ((uint64_t)x12 * x22)) + (((uint64_t)x14 * x20) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + (((uint64_t)x20 * x14) + ((0x2 * ((uint64_t)x22 * x12)) + (0x2 * ((uint64_t)x21 * x10)))))))))) + (0x800 * (((uint64_t)x8 * x21) + ((0x2 * ((uint64_t)x10 * x22)) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + ((0x2 * ((uint64_t)x16 * x16)) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + ((0x2 * ((uint64_t)x22 * x10)) + ((uint64_t)x21 * x8))))))))))));
+ { uint64_t x32 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + ((0x3d1 * (((uint64_t)x8 * x21) + ((0x2 * ((uint64_t)x10 * x22)) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + ((0x2 * ((uint64_t)x16 * x16)) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + ((0x2 * ((uint64_t)x22 * x10)) + ((uint64_t)x21 * x8)))))))))) + (0x800 * (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6)))))))))))));
+ { uint64_t x33 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + ((0x3d1 * (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6))))))))))) + (0x400 * ((0x2 * ((uint64_t)x4 * x21)) + ((0x2 * ((uint64_t)x6 * x22)) + (((uint64_t)x8 * x20) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + (((uint64_t)x20 * x8) + ((0x2 * ((uint64_t)x22 * x6)) + (0x2 * ((uint64_t)x21 * x4)))))))))))))));
+ { uint64_t x34 = (((uint64_t)x2 * x2) + (0x3d1 * ((0x2 * ((uint64_t)x4 * x21)) + ((0x2 * ((uint64_t)x6 * x22)) + (((uint64_t)x8 * x20) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + (((uint64_t)x20 * x8) + ((0x2 * ((uint64_t)x22 * x6)) + (0x2 * ((uint64_t)x21 * x4))))))))))))));
+ { uint64_t x35 = (x34 >> 0x16);
+ { uint32_t x36 = ((uint32_t)x34 & 0x3fffff);
+ { uint64_t x37 = (x23 >> 0x15);
+ { uint32_t x38 = ((uint32_t)x23 & 0x1fffff);
+ { uint64_t x39 = ((0x200000 * x37) + x38);
+ { uint64_t x40 = (x39 >> 0x15);
+ { uint32_t x41 = ((uint32_t)x39 & 0x1fffff);
+ { uint64_t x42 = ((x35 + x33) + (0x400 * x40));
+ { uint64_t x43 = (x42 >> 0x15);
+ { uint32_t x44 = ((uint32_t)x42 & 0x1fffff);
+ { uint64_t x45 = (x36 + (0x3d1 * x40));
+ { uint32_t x46 = (uint32_t) (x45 >> 0x16);
+ { uint32_t x47 = ((uint32_t)x45 & 0x3fffff);
+ { uint64_t x48 = (x43 + x32);
+ { uint64_t x49 = (x48 >> 0x15);
+ { uint32_t x50 = ((uint32_t)x48 & 0x1fffff);
+ { uint64_t x51 = (x49 + x31);
+ { uint64_t x52 = (x51 >> 0x16);
+ { uint32_t x53 = ((uint32_t)x51 & 0x3fffff);
+ { uint64_t x54 = (x52 + x30);
+ { uint64_t x55 = (x54 >> 0x15);
+ { uint32_t x56 = ((uint32_t)x54 & 0x1fffff);
+ { uint64_t x57 = (x55 + x29);
+ { uint64_t x58 = (x57 >> 0x15);
+ { uint32_t x59 = ((uint32_t)x57 & 0x1fffff);
+ { uint64_t x60 = (x58 + x28);
+ { uint64_t x61 = (x60 >> 0x16);
+ { uint32_t x62 = ((uint32_t)x60 & 0x3fffff);
+ { uint64_t x63 = (x61 + x27);
+ { uint64_t x64 = (x63 >> 0x15);
+ { uint32_t x65 = ((uint32_t)x63 & 0x1fffff);
+ { uint64_t x66 = (x64 + x26);
+ { uint64_t x67 = (x66 >> 0x15);
+ { uint32_t x68 = ((uint32_t)x66 & 0x1fffff);
+ { uint64_t x69 = (x67 + x25);
+ { uint64_t x70 = (x69 >> 0x16);
+ { uint32_t x71 = ((uint32_t)x69 & 0x3fffff);
+ { uint64_t x72 = (x70 + x24);
+ { uint64_t x73 = (x72 >> 0x15);
+ { uint32_t x74 = ((uint32_t)x72 & 0x1fffff);
+ { uint64_t x75 = (x73 + x41);
+ { uint32_t x76 = (uint32_t) (x75 >> 0x15);
+ { uint32_t x77 = ((uint32_t)x75 & 0x1fffff);
+ { uint64_t x78 = (((uint64_t)0x200000 * x76) + x77);
+ { uint32_t x79 = (uint32_t) (x78 >> 0x15);
+ { uint32_t x80 = ((uint32_t)x78 & 0x1fffff);
+ { uint32_t x81 = ((x46 + x44) + (0x400 * x79));
+ { uint32_t x82 = (x81 >> 0x15);
+ { uint32_t x83 = (x81 & 0x1fffff);
+ { uint32_t x84 = (x47 + (0x3d1 * x79));
+ { uint32_t x85 = (x84 >> 0x16);
+ { uint32_t x86 = (x84 & 0x3fffff);
+ out[0] = x86;
+ out[1] = (x85 + x83);
+ out[2] = (x82 + x50);
+ out[3] = x53;
+ out[4] = x56;
+ out[5] = x59;
+ out[6] = x62;
+ out[7] = x65;
+ out[8] = x68;
+ out[9] = x71;
+ out[10] = x74;
+ out[11] = x80;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e256m2e32m977/freeze.c b/src/Specific/solinas32_2e256m2e32m977/freeze.c
index 816df7d43..38b518d28 100644
--- a/src/Specific/solinas32_2e256m2e32m977/freeze.c
+++ b/src/Specific/solinas32_2e256m2e32m977/freeze.c
@@ -1,25 +1,64 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x24;
-out[1] = uint8_t x25 = Op Syntax.SubWithGetBorrow 22 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3ffc2f;;
+static void freeze(uint32_t out[12], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x3ffc2f);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x4, 0x1ffbff);
+ { uint32_t x30, uint8_t x31 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x28, Return x6, 0x1fffff);
+ { uint32_t x33, uint8_t x34 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x31, Return x8, 0x3fffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x34, Return x10, 0x1fffff);
+ { uint32_t x39, uint8_t x40 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x12, 0x1fffff);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x40, Return x14, 0x3fffff);
+ { uint32_t x45, uint8_t x46 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x43, Return x16, 0x1fffff);
+ { uint32_t x48, uint8_t x49 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x46, Return x18, 0x1fffff);
+ { uint32_t x51, uint8_t x52 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x49, Return x20, 0x3fffff);
+ { uint32_t x54, uint8_t x55 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x52, Return x22, 0x1fffff);
+ { uint32_t x57, uint8_t x58 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x55, Return x21, 0x1fffff);
+ { uint32_t x59 = (uint32_t)cmovznz(x58, 0x0, 0xffffffff);
+ { uint32_t x60 = (x59 & 0x3ffc2f);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x24, Return x60);
+ { uint32_t x64 = (x59 & 0x1ffbff);
+ { uint32_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x27, Return x64);
+ { uint32_t x68 = (x59 & 0x1fffff);
+ { uint32_t x70, uint8_t x71 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x67, Return x30, Return x68);
+ { uint32_t x72 = (x59 & 0x3fffff);
+ { uint32_t x74, uint8_t x75 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x71, Return x33, Return x72);
+ { uint32_t x76 = (x59 & 0x1fffff);
+ { uint32_t x78, uint8_t x79 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x36, Return x76);
+ { uint32_t x80 = (x59 & 0x1fffff);
+ { uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x79, Return x39, Return x80);
+ { uint32_t x84 = (x59 & 0x3fffff);
+ { uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x42, Return x84);
+ { uint32_t x88 = (x59 & 0x1fffff);
+ { uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x45, Return x88);
+ { uint32_t x92 = (x59 & 0x1fffff);
+ { uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x48, Return x92);
+ { uint32_t x96 = (x59 & 0x3fffff);
+ { uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x51, Return x96);
+ { uint32_t x100 = (x59 & 0x1fffff);
+ { uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x54, Return x100);
+ { uint32_t x104 = (x59 & 0x1fffff);
+ { uint32_t x106, uint8_t _ = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x57, Return x104);
+ out[0] = x62;
+ out[1] = x66;
+ out[2] = x70;
+ out[3] = x74;
+ out[4] = x78;
+ out[5] = x82;
+ out[6] = x86;
+ out[7] = x90;
+ out[8] = x94;
+ out[9] = x98;
+ out[10] = x102;
+ out[11] = x106;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e256m88x2e240m1/freeze.c b/src/Specific/solinas32_2e256m88x2e240m1/freeze.c
index 831304167..704a289a8 100644
--- a/src/Specific/solinas32_2e256m88x2e240m1/freeze.c
+++ b/src/Specific/solinas32_2e256m88x2e240m1/freeze.c
@@ -1,25 +1,64 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x24;
-out[1] = uint8_t x25 = Op Syntax.SubWithGetBorrow 22 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3fffff;;
+static void freeze(uint32_t out[12], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x3fffff);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x4, 0x1fffff);
+ { uint32_t x30, uint8_t x31 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x28, Return x6, 0x1fffff);
+ { uint32_t x33, uint8_t x34 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x31, Return x8, 0x3fffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x34, Return x10, 0x1fffff);
+ { uint32_t x39, uint8_t x40 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x12, 0x1fffff);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x40, Return x14, 0x3fffff);
+ { uint32_t x45, uint8_t x46 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x43, Return x16, 0x1fffff);
+ { uint32_t x48, uint8_t x49 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x46, Return x18, 0x1fffff);
+ { uint32_t x51, uint8_t x52 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x49, Return x20, 0x3fffff);
+ { uint32_t x54, uint8_t x55 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x52, Return x22, 0x1fffff);
+ { uint32_t x57, uint8_t x58 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x55, Return x21, 0x1ff4ff);
+ { uint32_t x59 = (uint32_t)cmovznz(x58, 0x0, 0xffffffff);
+ { uint32_t x60 = (x59 & 0x3fffff);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x24, Return x60);
+ { uint32_t x64 = (x59 & 0x1fffff);
+ { uint32_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x27, Return x64);
+ { uint32_t x68 = (x59 & 0x1fffff);
+ { uint32_t x70, uint8_t x71 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x67, Return x30, Return x68);
+ { uint32_t x72 = (x59 & 0x3fffff);
+ { uint32_t x74, uint8_t x75 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x71, Return x33, Return x72);
+ { uint32_t x76 = (x59 & 0x1fffff);
+ { uint32_t x78, uint8_t x79 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x36, Return x76);
+ { uint32_t x80 = (x59 & 0x1fffff);
+ { uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x79, Return x39, Return x80);
+ { uint32_t x84 = (x59 & 0x3fffff);
+ { uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x42, Return x84);
+ { uint32_t x88 = (x59 & 0x1fffff);
+ { uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x45, Return x88);
+ { uint32_t x92 = (x59 & 0x1fffff);
+ { uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x48, Return x92);
+ { uint32_t x96 = (x59 & 0x3fffff);
+ { uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x51, Return x96);
+ { uint32_t x100 = (x59 & 0x1fffff);
+ { uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x54, Return x100);
+ { uint32_t x104 = (x59 & 0x1ff4ff);
+ { uint32_t x106, uint8_t _ = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x57, Return x104);
+ out[0] = x62;
+ out[1] = x66;
+ out[2] = x70;
+ out[3] = x74;
+ out[4] = x78;
+ out[5] = x82;
+ out[6] = x86;
+ out[7] = x90;
+ out[8] = x94;
+ out[9] = x98;
+ out[10] = x102;
+ out[11] = x106;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e266m3/femul.c b/src/Specific/solinas32_2e266m3/femul.c
index 00a030310..6f615fcd9 100644
--- a/src/Specific/solinas32_2e266m3/femul.c
+++ b/src/Specific/solinas32_2e266m3/femul.c
@@ -1,86 +1,92 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
-{ uint64_t x48 = (((uint64_t)x5 * x46) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + ((0x2 * ((uint64_t)x11 * x43)) + ((0x2 * ((uint64_t)x13 * x41)) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((0x2 * ((uint64_t)x19 * x35)) + ((0x2 * ((uint64_t)x21 * x33)) + ((0x2 * ((uint64_t)x23 * x31)) + ((0x2 * ((uint64_t)x25 * x29)) + ((uint64_t)x24 * x27))))))))))));
-{ uint64_t x49 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + ((0x2 * ((uint64_t)x9 * x43)) + ((0x2 * ((uint64_t)x11 * x41)) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + ((0x2 * ((uint64_t)x19 * x33)) + ((0x2 * ((uint64_t)x21 * x31)) + ((0x2 * ((uint64_t)x23 * x29)) + ((uint64_t)x25 * x27))))))))))) + (0x3 * ((uint64_t)x24 * x46)));
-{ uint64_t x50 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + ((0x2 * ((uint64_t)x21 * x29)) + ((uint64_t)x23 * x27)))))))))) + (0x3 * (((uint64_t)x25 * x46) + ((uint64_t)x24 * x47))));
-{ uint64_t x51 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x21 * x27))))))))) + (0x3 * (((uint64_t)x23 * x46) + (((uint64_t)x25 * x47) + ((uint64_t)x24 * x45)))));
-{ uint64_t x52 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x19 * x27)))))))) + (0x3 * (((uint64_t)x21 * x46) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + ((uint64_t)x24 * x43))))));
-{ uint64_t x53 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + ((0x2 * ((uint64_t)x11 * x33)) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((uint64_t)x17 * x27))))))) + (0x3 * ((0x2 * ((uint64_t)x19 * x46)) + ((0x2 * ((uint64_t)x21 * x47)) + ((0x2 * ((uint64_t)x23 * x45)) + ((0x2 * ((uint64_t)x25 * x43)) + (0x2 * ((uint64_t)x24 * x41))))))));
-{ uint64_t x54 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + ((0x2 * ((uint64_t)x9 * x33)) + ((0x2 * ((uint64_t)x11 * x31)) + ((0x2 * ((uint64_t)x13 * x29)) + ((uint64_t)x15 * x27)))))) + (0x3 * (((uint64_t)x17 * x46) + ((0x2 * ((uint64_t)x19 * x47)) + ((0x2 * ((uint64_t)x21 * x45)) + ((0x2 * ((uint64_t)x23 * x43)) + ((0x2 * ((uint64_t)x25 * x41)) + ((uint64_t)x24 * x39))))))));
-{ uint64_t x55 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((uint64_t)x13 * x27))))) + (0x3 * (((uint64_t)x15 * x46) + (((uint64_t)x17 * x47) + ((0x2 * ((uint64_t)x19 * x45)) + ((0x2 * ((uint64_t)x21 * x43)) + ((0x2 * ((uint64_t)x23 * x41)) + (((uint64_t)x25 * x39) + ((uint64_t)x24 * x37)))))))));
-{ uint64_t x56 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((uint64_t)x11 * x27)))) + (0x3 * (((uint64_t)x13 * x46) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + (((uint64_t)x23 * x39) + (((uint64_t)x25 * x37) + ((uint64_t)x24 * x35))))))))));
-{ uint64_t x57 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((uint64_t)x9 * x27))) + (0x3 * (((uint64_t)x11 * x46) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((0x2 * ((uint64_t)x19 * x41)) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + (((uint64_t)x25 * x35) + ((uint64_t)x24 * x33)))))))))));
-{ uint64_t x58 = ((((uint64_t)x5 * x29) + ((uint64_t)x7 * x27)) + (0x3 * (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31))))))))))));
-{ uint64_t x59 = (((uint64_t)x5 * x27) + (0x3 * ((0x2 * ((uint64_t)x7 * x46)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + (((uint64_t)x17 * x39) + ((0x2 * ((uint64_t)x19 * x37)) + ((0x2 * ((uint64_t)x21 * x35)) + ((0x2 * ((uint64_t)x23 * x33)) + ((0x2 * ((uint64_t)x25 * x31)) + (0x2 * ((uint64_t)x24 * x29))))))))))))));
-{ uint32_t x60 = (uint32_t) (x59 >> 0x17);
-{ uint32_t x61 = ((uint32_t)x59 & 0x7fffff);
-{ uint64_t x62 = (x60 + x58);
-{ uint32_t x63 = (uint32_t) (x62 >> 0x16);
-{ uint32_t x64 = ((uint32_t)x62 & 0x3fffff);
-{ uint64_t x65 = (x63 + x57);
-{ uint32_t x66 = (uint32_t) (x65 >> 0x16);
-{ uint32_t x67 = ((uint32_t)x65 & 0x3fffff);
-{ uint64_t x68 = (x66 + x56);
-{ uint32_t x69 = (uint32_t) (x68 >> 0x16);
-{ uint32_t x70 = ((uint32_t)x68 & 0x3fffff);
-{ uint64_t x71 = (x69 + x55);
-{ uint32_t x72 = (uint32_t) (x71 >> 0x16);
-{ uint32_t x73 = ((uint32_t)x71 & 0x3fffff);
-{ uint64_t x74 = (x72 + x54);
-{ uint32_t x75 = (uint32_t) (x74 >> 0x16);
-{ uint32_t x76 = ((uint32_t)x74 & 0x3fffff);
-{ uint64_t x77 = (x75 + x53);
-{ uint32_t x78 = (uint32_t) (x77 >> 0x17);
-{ uint32_t x79 = ((uint32_t)x77 & 0x7fffff);
-{ uint64_t x80 = (x78 + x52);
-{ uint32_t x81 = (uint32_t) (x80 >> 0x16);
-{ uint32_t x82 = ((uint32_t)x80 & 0x3fffff);
-{ uint64_t x83 = (x81 + x51);
-{ uint32_t x84 = (uint32_t) (x83 >> 0x16);
-{ uint32_t x85 = ((uint32_t)x83 & 0x3fffff);
-{ uint64_t x86 = (x84 + x50);
-{ uint32_t x87 = (uint32_t) (x86 >> 0x16);
-{ uint32_t x88 = ((uint32_t)x86 & 0x3fffff);
-{ uint64_t x89 = (x87 + x49);
-{ uint32_t x90 = (uint32_t) (x89 >> 0x16);
-{ uint32_t x91 = ((uint32_t)x89 & 0x3fffff);
-{ uint64_t x92 = (x90 + x48);
-{ uint32_t x93 = (uint32_t) (x92 >> 0x16);
-{ uint32_t x94 = ((uint32_t)x92 & 0x3fffff);
-{ uint32_t x95 = (x61 + (0x3 * x93));
-{ uint32_t x96 = (x95 >> 0x17);
-{ uint32_t x97 = (x95 & 0x7fffff);
-{ uint32_t x98 = (x96 + x64);
-{ uint32_t x99 = (x98 >> 0x16);
-{ uint32_t x100 = (x98 & 0x3fffff);
-out[0] = x94;
-out[1] = x91;
-out[2] = x88;
-out[3] = x85;
-out[4] = x82;
-out[5] = x79;
-out[6] = x76;
-out[7] = x73;
-out[8] = x70;
-out[9] = x99 + x67;
-out[10] = x100;
-out[11] = x97;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void femul(uint32_t out[12], const uint32_t in1[12], const uint32_t in2[12]) {
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x46 = in2[11];
+ { const uint32_t x47 = in2[10];
+ { const uint32_t x45 = in2[9];
+ { const uint32_t x43 = in2[8];
+ { const uint32_t x41 = in2[7];
+ { const uint32_t x39 = in2[6];
+ { const uint32_t x37 = in2[5];
+ { const uint32_t x35 = in2[4];
+ { const uint32_t x33 = in2[3];
+ { const uint32_t x31 = in2[2];
+ { const uint32_t x29 = in2[1];
+ { const uint32_t x27 = in2[0];
+ { uint64_t x48 = (((uint64_t)x5 * x46) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + ((0x2 * ((uint64_t)x11 * x43)) + ((0x2 * ((uint64_t)x13 * x41)) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((0x2 * ((uint64_t)x19 * x35)) + ((0x2 * ((uint64_t)x21 * x33)) + ((0x2 * ((uint64_t)x23 * x31)) + ((0x2 * ((uint64_t)x25 * x29)) + ((uint64_t)x24 * x27))))))))))));
+ { uint64_t x49 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + ((0x2 * ((uint64_t)x9 * x43)) + ((0x2 * ((uint64_t)x11 * x41)) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + ((0x2 * ((uint64_t)x19 * x33)) + ((0x2 * ((uint64_t)x21 * x31)) + ((0x2 * ((uint64_t)x23 * x29)) + ((uint64_t)x25 * x27))))))))))) + (0x3 * ((uint64_t)x24 * x46)));
+ { uint64_t x50 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + ((0x2 * ((uint64_t)x21 * x29)) + ((uint64_t)x23 * x27)))))))))) + (0x3 * (((uint64_t)x25 * x46) + ((uint64_t)x24 * x47))));
+ { uint64_t x51 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x21 * x27))))))))) + (0x3 * (((uint64_t)x23 * x46) + (((uint64_t)x25 * x47) + ((uint64_t)x24 * x45)))));
+ { uint64_t x52 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x19 * x27)))))))) + (0x3 * (((uint64_t)x21 * x46) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + ((uint64_t)x24 * x43))))));
+ { uint64_t x53 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + ((0x2 * ((uint64_t)x11 * x33)) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((uint64_t)x17 * x27))))))) + (0x3 * ((0x2 * ((uint64_t)x19 * x46)) + ((0x2 * ((uint64_t)x21 * x47)) + ((0x2 * ((uint64_t)x23 * x45)) + ((0x2 * ((uint64_t)x25 * x43)) + (0x2 * ((uint64_t)x24 * x41))))))));
+ { uint64_t x54 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + ((0x2 * ((uint64_t)x9 * x33)) + ((0x2 * ((uint64_t)x11 * x31)) + ((0x2 * ((uint64_t)x13 * x29)) + ((uint64_t)x15 * x27)))))) + (0x3 * (((uint64_t)x17 * x46) + ((0x2 * ((uint64_t)x19 * x47)) + ((0x2 * ((uint64_t)x21 * x45)) + ((0x2 * ((uint64_t)x23 * x43)) + ((0x2 * ((uint64_t)x25 * x41)) + ((uint64_t)x24 * x39))))))));
+ { uint64_t x55 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((uint64_t)x13 * x27))))) + (0x3 * (((uint64_t)x15 * x46) + (((uint64_t)x17 * x47) + ((0x2 * ((uint64_t)x19 * x45)) + ((0x2 * ((uint64_t)x21 * x43)) + ((0x2 * ((uint64_t)x23 * x41)) + (((uint64_t)x25 * x39) + ((uint64_t)x24 * x37)))))))));
+ { uint64_t x56 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((uint64_t)x11 * x27)))) + (0x3 * (((uint64_t)x13 * x46) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + (((uint64_t)x23 * x39) + (((uint64_t)x25 * x37) + ((uint64_t)x24 * x35))))))))));
+ { uint64_t x57 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((uint64_t)x9 * x27))) + (0x3 * (((uint64_t)x11 * x46) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((0x2 * ((uint64_t)x19 * x41)) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + (((uint64_t)x25 * x35) + ((uint64_t)x24 * x33)))))))))));
+ { uint64_t x58 = ((((uint64_t)x5 * x29) + ((uint64_t)x7 * x27)) + (0x3 * (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31))))))))))));
+ { uint64_t x59 = (((uint64_t)x5 * x27) + (0x3 * ((0x2 * ((uint64_t)x7 * x46)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + (((uint64_t)x17 * x39) + ((0x2 * ((uint64_t)x19 * x37)) + ((0x2 * ((uint64_t)x21 * x35)) + ((0x2 * ((uint64_t)x23 * x33)) + ((0x2 * ((uint64_t)x25 * x31)) + (0x2 * ((uint64_t)x24 * x29))))))))))))));
+ { uint32_t x60 = (uint32_t) (x59 >> 0x17);
+ { uint32_t x61 = ((uint32_t)x59 & 0x7fffff);
+ { uint64_t x62 = (x60 + x58);
+ { uint32_t x63 = (uint32_t) (x62 >> 0x16);
+ { uint32_t x64 = ((uint32_t)x62 & 0x3fffff);
+ { uint64_t x65 = (x63 + x57);
+ { uint32_t x66 = (uint32_t) (x65 >> 0x16);
+ { uint32_t x67 = ((uint32_t)x65 & 0x3fffff);
+ { uint64_t x68 = (x66 + x56);
+ { uint32_t x69 = (uint32_t) (x68 >> 0x16);
+ { uint32_t x70 = ((uint32_t)x68 & 0x3fffff);
+ { uint64_t x71 = (x69 + x55);
+ { uint32_t x72 = (uint32_t) (x71 >> 0x16);
+ { uint32_t x73 = ((uint32_t)x71 & 0x3fffff);
+ { uint64_t x74 = (x72 + x54);
+ { uint32_t x75 = (uint32_t) (x74 >> 0x16);
+ { uint32_t x76 = ((uint32_t)x74 & 0x3fffff);
+ { uint64_t x77 = (x75 + x53);
+ { uint32_t x78 = (uint32_t) (x77 >> 0x17);
+ { uint32_t x79 = ((uint32_t)x77 & 0x7fffff);
+ { uint64_t x80 = (x78 + x52);
+ { uint32_t x81 = (uint32_t) (x80 >> 0x16);
+ { uint32_t x82 = ((uint32_t)x80 & 0x3fffff);
+ { uint64_t x83 = (x81 + x51);
+ { uint32_t x84 = (uint32_t) (x83 >> 0x16);
+ { uint32_t x85 = ((uint32_t)x83 & 0x3fffff);
+ { uint64_t x86 = (x84 + x50);
+ { uint32_t x87 = (uint32_t) (x86 >> 0x16);
+ { uint32_t x88 = ((uint32_t)x86 & 0x3fffff);
+ { uint64_t x89 = (x87 + x49);
+ { uint32_t x90 = (uint32_t) (x89 >> 0x16);
+ { uint32_t x91 = ((uint32_t)x89 & 0x3fffff);
+ { uint64_t x92 = (x90 + x48);
+ { uint32_t x93 = (uint32_t) (x92 >> 0x16);
+ { uint32_t x94 = ((uint32_t)x92 & 0x3fffff);
+ { uint32_t x95 = (x61 + (0x3 * x93));
+ { uint32_t x96 = (x95 >> 0x17);
+ { uint32_t x97 = (x95 & 0x7fffff);
+ { uint32_t x98 = (x96 + x64);
+ { uint32_t x99 = (x98 >> 0x16);
+ { uint32_t x100 = (x98 & 0x3fffff);
+ out[0] = x97;
+ out[1] = x100;
+ out[2] = (x99 + x67);
+ out[3] = x70;
+ out[4] = x73;
+ out[5] = x76;
+ out[6] = x79;
+ out[7] = x82;
+ out[8] = x85;
+ out[9] = x88;
+ out[10] = x91;
+ out[11] = x94;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e266m3/fesquare.c b/src/Specific/solinas32_2e266m3/fesquare.c
index 825e82c23..b162c3b09 100644
--- a/src/Specific/solinas32_2e266m3/fesquare.c
+++ b/src/Specific/solinas32_2e266m3/fesquare.c
@@ -1,86 +1,80 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x23 = (((uint64_t)x2 * x21) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + ((0x2 * ((uint64_t)x8 * x18)) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + ((0x2 * ((uint64_t)x18 * x8)) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x21 * x2))))))))))));
-{ uint64_t x24 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x3 * ((uint64_t)x21 * x21)));
-{ uint64_t x25 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x3 * (((uint64_t)x22 * x21) + ((uint64_t)x21 * x22))));
-{ uint64_t x26 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x3 * (((uint64_t)x20 * x21) + (((uint64_t)x22 * x22) + ((uint64_t)x21 * x20)))));
-{ uint64_t x27 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x3 * (((uint64_t)x18 * x21) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x21 * x18))))));
-{ uint64_t x28 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x3 * ((0x2 * ((uint64_t)x16 * x21)) + ((0x2 * ((uint64_t)x18 * x22)) + ((0x2 * ((uint64_t)x20 * x20)) + ((0x2 * ((uint64_t)x22 * x18)) + (0x2 * ((uint64_t)x21 * x16))))))));
-{ uint64_t x29 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x3 * (((uint64_t)x14 * x21) + ((0x2 * ((uint64_t)x16 * x22)) + ((0x2 * ((uint64_t)x18 * x20)) + ((0x2 * ((uint64_t)x20 * x18)) + ((0x2 * ((uint64_t)x22 * x16)) + ((uint64_t)x21 * x14))))))));
-{ uint64_t x30 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x3 * (((uint64_t)x12 * x21) + (((uint64_t)x14 * x22) + ((0x2 * ((uint64_t)x16 * x20)) + ((0x2 * ((uint64_t)x18 * x18)) + ((0x2 * ((uint64_t)x20 * x16)) + (((uint64_t)x22 * x14) + ((uint64_t)x21 * x12)))))))));
-{ uint64_t x31 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x3 * (((uint64_t)x10 * x21) + (((uint64_t)x12 * x22) + (((uint64_t)x14 * x20) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + (((uint64_t)x20 * x14) + (((uint64_t)x22 * x12) + ((uint64_t)x21 * x10))))))))));
-{ uint64_t x32 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x3 * (((uint64_t)x8 * x21) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + ((0x2 * ((uint64_t)x16 * x16)) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + ((uint64_t)x21 * x8)))))))))));
-{ uint64_t x33 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x3 * (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6))))))))))));
-{ uint64_t x34 = (((uint64_t)x2 * x2) + (0x3 * ((0x2 * ((uint64_t)x4 * x21)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + (0x2 * ((uint64_t)x21 * x4))))))))))))));
-{ uint32_t x35 = (uint32_t) (x34 >> 0x17);
-{ uint32_t x36 = ((uint32_t)x34 & 0x7fffff);
-{ uint64_t x37 = (x35 + x33);
-{ uint32_t x38 = (uint32_t) (x37 >> 0x16);
-{ uint32_t x39 = ((uint32_t)x37 & 0x3fffff);
-{ uint64_t x40 = (x38 + x32);
-{ uint32_t x41 = (uint32_t) (x40 >> 0x16);
-{ uint32_t x42 = ((uint32_t)x40 & 0x3fffff);
-{ uint64_t x43 = (x41 + x31);
-{ uint32_t x44 = (uint32_t) (x43 >> 0x16);
-{ uint32_t x45 = ((uint32_t)x43 & 0x3fffff);
-{ uint64_t x46 = (x44 + x30);
-{ uint32_t x47 = (uint32_t) (x46 >> 0x16);
-{ uint32_t x48 = ((uint32_t)x46 & 0x3fffff);
-{ uint64_t x49 = (x47 + x29);
-{ uint32_t x50 = (uint32_t) (x49 >> 0x16);
-{ uint32_t x51 = ((uint32_t)x49 & 0x3fffff);
-{ uint64_t x52 = (x50 + x28);
-{ uint32_t x53 = (uint32_t) (x52 >> 0x17);
-{ uint32_t x54 = ((uint32_t)x52 & 0x7fffff);
-{ uint64_t x55 = (x53 + x27);
-{ uint32_t x56 = (uint32_t) (x55 >> 0x16);
-{ uint32_t x57 = ((uint32_t)x55 & 0x3fffff);
-{ uint64_t x58 = (x56 + x26);
-{ uint32_t x59 = (uint32_t) (x58 >> 0x16);
-{ uint32_t x60 = ((uint32_t)x58 & 0x3fffff);
-{ uint64_t x61 = (x59 + x25);
-{ uint32_t x62 = (uint32_t) (x61 >> 0x16);
-{ uint32_t x63 = ((uint32_t)x61 & 0x3fffff);
-{ uint64_t x64 = (x62 + x24);
-{ uint32_t x65 = (uint32_t) (x64 >> 0x16);
-{ uint32_t x66 = ((uint32_t)x64 & 0x3fffff);
-{ uint64_t x67 = (x65 + x23);
-{ uint32_t x68 = (uint32_t) (x67 >> 0x16);
-{ uint32_t x69 = ((uint32_t)x67 & 0x3fffff);
-{ uint32_t x70 = (x36 + (0x3 * x68));
-{ uint32_t x71 = (x70 >> 0x17);
-{ uint32_t x72 = (x70 & 0x7fffff);
-{ uint32_t x73 = (x71 + x39);
-{ uint32_t x74 = (x73 >> 0x16);
-{ uint32_t x75 = (x73 & 0x3fffff);
-out[0] = x69;
-out[1] = x66;
-out[2] = x63;
-out[3] = x60;
-out[4] = x57;
-out[5] = x54;
-out[6] = x51;
-out[7] = x48;
-out[8] = x45;
-out[9] = x74 + x42;
-out[10] = x75;
-out[11] = x72;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void fesquare(uint32_t out[12], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x23 = (((uint64_t)x2 * x21) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + ((0x2 * ((uint64_t)x8 * x18)) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + ((0x2 * ((uint64_t)x18 * x8)) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x21 * x2))))))))))));
+ { uint64_t x24 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x3 * ((uint64_t)x21 * x21)));
+ { uint64_t x25 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x3 * (((uint64_t)x22 * x21) + ((uint64_t)x21 * x22))));
+ { uint64_t x26 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x3 * (((uint64_t)x20 * x21) + (((uint64_t)x22 * x22) + ((uint64_t)x21 * x20)))));
+ { uint64_t x27 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x3 * (((uint64_t)x18 * x21) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x21 * x18))))));
+ { uint64_t x28 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x3 * ((0x2 * ((uint64_t)x16 * x21)) + ((0x2 * ((uint64_t)x18 * x22)) + ((0x2 * ((uint64_t)x20 * x20)) + ((0x2 * ((uint64_t)x22 * x18)) + (0x2 * ((uint64_t)x21 * x16))))))));
+ { uint64_t x29 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x3 * (((uint64_t)x14 * x21) + ((0x2 * ((uint64_t)x16 * x22)) + ((0x2 * ((uint64_t)x18 * x20)) + ((0x2 * ((uint64_t)x20 * x18)) + ((0x2 * ((uint64_t)x22 * x16)) + ((uint64_t)x21 * x14))))))));
+ { uint64_t x30 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x3 * (((uint64_t)x12 * x21) + (((uint64_t)x14 * x22) + ((0x2 * ((uint64_t)x16 * x20)) + ((0x2 * ((uint64_t)x18 * x18)) + ((0x2 * ((uint64_t)x20 * x16)) + (((uint64_t)x22 * x14) + ((uint64_t)x21 * x12)))))))));
+ { uint64_t x31 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x3 * (((uint64_t)x10 * x21) + (((uint64_t)x12 * x22) + (((uint64_t)x14 * x20) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + (((uint64_t)x20 * x14) + (((uint64_t)x22 * x12) + ((uint64_t)x21 * x10))))))))));
+ { uint64_t x32 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x3 * (((uint64_t)x8 * x21) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + ((0x2 * ((uint64_t)x16 * x16)) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + ((uint64_t)x21 * x8)))))))))));
+ { uint64_t x33 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x3 * (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6))))))))))));
+ { uint64_t x34 = (((uint64_t)x2 * x2) + (0x3 * ((0x2 * ((uint64_t)x4 * x21)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + (0x2 * ((uint64_t)x21 * x4))))))))))))));
+ { uint32_t x35 = (uint32_t) (x34 >> 0x17);
+ { uint32_t x36 = ((uint32_t)x34 & 0x7fffff);
+ { uint64_t x37 = (x35 + x33);
+ { uint32_t x38 = (uint32_t) (x37 >> 0x16);
+ { uint32_t x39 = ((uint32_t)x37 & 0x3fffff);
+ { uint64_t x40 = (x38 + x32);
+ { uint32_t x41 = (uint32_t) (x40 >> 0x16);
+ { uint32_t x42 = ((uint32_t)x40 & 0x3fffff);
+ { uint64_t x43 = (x41 + x31);
+ { uint32_t x44 = (uint32_t) (x43 >> 0x16);
+ { uint32_t x45 = ((uint32_t)x43 & 0x3fffff);
+ { uint64_t x46 = (x44 + x30);
+ { uint32_t x47 = (uint32_t) (x46 >> 0x16);
+ { uint32_t x48 = ((uint32_t)x46 & 0x3fffff);
+ { uint64_t x49 = (x47 + x29);
+ { uint32_t x50 = (uint32_t) (x49 >> 0x16);
+ { uint32_t x51 = ((uint32_t)x49 & 0x3fffff);
+ { uint64_t x52 = (x50 + x28);
+ { uint32_t x53 = (uint32_t) (x52 >> 0x17);
+ { uint32_t x54 = ((uint32_t)x52 & 0x7fffff);
+ { uint64_t x55 = (x53 + x27);
+ { uint32_t x56 = (uint32_t) (x55 >> 0x16);
+ { uint32_t x57 = ((uint32_t)x55 & 0x3fffff);
+ { uint64_t x58 = (x56 + x26);
+ { uint32_t x59 = (uint32_t) (x58 >> 0x16);
+ { uint32_t x60 = ((uint32_t)x58 & 0x3fffff);
+ { uint64_t x61 = (x59 + x25);
+ { uint32_t x62 = (uint32_t) (x61 >> 0x16);
+ { uint32_t x63 = ((uint32_t)x61 & 0x3fffff);
+ { uint64_t x64 = (x62 + x24);
+ { uint32_t x65 = (uint32_t) (x64 >> 0x16);
+ { uint32_t x66 = ((uint32_t)x64 & 0x3fffff);
+ { uint64_t x67 = (x65 + x23);
+ { uint32_t x68 = (uint32_t) (x67 >> 0x16);
+ { uint32_t x69 = ((uint32_t)x67 & 0x3fffff);
+ { uint32_t x70 = (x36 + (0x3 * x68));
+ { uint32_t x71 = (x70 >> 0x17);
+ { uint32_t x72 = (x70 & 0x7fffff);
+ { uint32_t x73 = (x71 + x39);
+ { uint32_t x74 = (x73 >> 0x16);
+ { uint32_t x75 = (x73 & 0x3fffff);
+ out[0] = x72;
+ out[1] = x75;
+ out[2] = (x74 + x42);
+ out[3] = x45;
+ out[4] = x48;
+ out[5] = x51;
+ out[6] = x54;
+ out[7] = x57;
+ out[8] = x60;
+ out[9] = x63;
+ out[10] = x66;
+ out[11] = x69;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e266m3/freeze.c b/src/Specific/solinas32_2e266m3/freeze.c
index fcba7bd38..fa92594a0 100644
--- a/src/Specific/solinas32_2e266m3/freeze.c
+++ b/src/Specific/solinas32_2e266m3/freeze.c
@@ -1,25 +1,64 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x24;
-out[1] = uint8_t x25 = Op Syntax.SubWithGetBorrow 23 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7ffffd;;
+static void freeze(uint32_t out[12], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x7ffffd);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x4, 0x3fffff);
+ { uint32_t x30, uint8_t x31 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x28, Return x6, 0x3fffff);
+ { uint32_t x33, uint8_t x34 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x31, Return x8, 0x3fffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x34, Return x10, 0x3fffff);
+ { uint32_t x39, uint8_t x40 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x12, 0x3fffff);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x40, Return x14, 0x7fffff);
+ { uint32_t x45, uint8_t x46 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x43, Return x16, 0x3fffff);
+ { uint32_t x48, uint8_t x49 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x46, Return x18, 0x3fffff);
+ { uint32_t x51, uint8_t x52 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x49, Return x20, 0x3fffff);
+ { uint32_t x54, uint8_t x55 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x52, Return x22, 0x3fffff);
+ { uint32_t x57, uint8_t x58 = Op (Syntax.SubWithGetBorrow 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x55, Return x21, 0x3fffff);
+ { uint32_t x59 = (uint32_t)cmovznz(x58, 0x0, 0xffffffff);
+ { uint32_t x60 = (x59 & 0x7ffffd);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x24, Return x60);
+ { uint32_t x64 = (x59 & 0x3fffff);
+ { uint32_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x27, Return x64);
+ { uint32_t x68 = (x59 & 0x3fffff);
+ { uint32_t x70, uint8_t x71 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x67, Return x30, Return x68);
+ { uint32_t x72 = (x59 & 0x3fffff);
+ { uint32_t x74, uint8_t x75 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x71, Return x33, Return x72);
+ { uint32_t x76 = (x59 & 0x3fffff);
+ { uint32_t x78, uint8_t x79 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x36, Return x76);
+ { uint32_t x80 = (x59 & 0x3fffff);
+ { uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x79, Return x39, Return x80);
+ { uint32_t x84 = (x59 & 0x7fffff);
+ { uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x42, Return x84);
+ { uint32_t x88 = (x59 & 0x3fffff);
+ { uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x45, Return x88);
+ { uint32_t x92 = (x59 & 0x3fffff);
+ { uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x48, Return x92);
+ { uint32_t x96 = (x59 & 0x3fffff);
+ { uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x51, Return x96);
+ { uint32_t x100 = (x59 & 0x3fffff);
+ { uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x54, Return x100);
+ { uint32_t x104 = (x59 & 0x3fffff);
+ { uint32_t x106, uint8_t _ = Op (Syntax.AddWithGetCarry 22 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x57, Return x104);
+ out[0] = x62;
+ out[1] = x66;
+ out[2] = x70;
+ out[3] = x74;
+ out[4] = x78;
+ out[5] = x82;
+ out[6] = x86;
+ out[7] = x90;
+ out[8] = x94;
+ out[9] = x98;
+ out[10] = x102;
+ out[11] = x106;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e285m9/femul.c b/src/Specific/solinas32_2e285m9/femul.c
index 54099ec33..1dda6f3aa 100644
--- a/src/Specific/solinas32_2e285m9/femul.c
+++ b/src/Specific/solinas32_2e285m9/femul.c
@@ -1,101 +1,113 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x58, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33)
-{ uint64_t x60 = (((uint64_t)x5 * x58) + (((uint64_t)x7 * x59) + (((uint64_t)x9 * x57) + (((uint64_t)x11 * x55) + (((uint64_t)x13 * x53) + (((uint64_t)x15 * x51) + (((uint64_t)x17 * x49) + (((uint64_t)x19 * x47) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + (((uint64_t)x27 * x39) + (((uint64_t)x29 * x37) + (((uint64_t)x31 * x35) + ((uint64_t)x30 * x33)))))))))))))));
-{ uint64_t x61 = ((((uint64_t)x5 * x59) + (((uint64_t)x7 * x57) + (((uint64_t)x9 * x55) + (((uint64_t)x11 * x53) + (((uint64_t)x13 * x51) + (((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + (((uint64_t)x19 * x45) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + (((uint64_t)x25 * x39) + (((uint64_t)x27 * x37) + (((uint64_t)x29 * x35) + ((uint64_t)x31 * x33)))))))))))))) + (0x9 * ((uint64_t)x30 * x58)));
-{ uint64_t x62 = ((((uint64_t)x5 * x57) + (((uint64_t)x7 * x55) + (((uint64_t)x9 * x53) + (((uint64_t)x11 * x51) + (((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + (((uint64_t)x23 * x39) + (((uint64_t)x25 * x37) + (((uint64_t)x27 * x35) + ((uint64_t)x29 * x33))))))))))))) + (0x9 * (((uint64_t)x31 * x58) + ((uint64_t)x30 * x59))));
-{ uint64_t x63 = ((((uint64_t)x5 * x55) + (((uint64_t)x7 * x53) + (((uint64_t)x9 * x51) + (((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + (((uint64_t)x25 * x35) + ((uint64_t)x27 * x33)))))))))))) + (0x9 * (((uint64_t)x29 * x58) + (((uint64_t)x31 * x59) + ((uint64_t)x30 * x57)))));
-{ uint64_t x64 = ((((uint64_t)x5 * x53) + (((uint64_t)x7 * x51) + (((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + ((uint64_t)x25 * x33))))))))))) + (0x9 * (((uint64_t)x27 * x58) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + ((uint64_t)x30 * x55))))));
-{ uint64_t x65 = ((((uint64_t)x5 * x51) + (((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + ((uint64_t)x23 * x33)))))))))) + (0x9 * (((uint64_t)x25 * x58) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + ((uint64_t)x30 * x53)))))));
-{ uint64_t x66 = ((((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + ((uint64_t)x21 * x33))))))))) + (0x9 * (((uint64_t)x23 * x58) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + ((uint64_t)x30 * x51))))))));
-{ uint64_t x67 = ((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + ((uint64_t)x19 * x33)))))))) + (0x9 * (((uint64_t)x21 * x58) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + (((uint64_t)x31 * x51) + ((uint64_t)x30 * x49)))))))));
-{ uint64_t x68 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + ((uint64_t)x17 * x33))))))) + (0x9 * (((uint64_t)x19 * x58) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + (((uint64_t)x29 * x51) + (((uint64_t)x31 * x49) + ((uint64_t)x30 * x47))))))))));
-{ uint64_t x69 = ((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + ((uint64_t)x15 * x33)))))) + (0x9 * (((uint64_t)x17 * x58) + (((uint64_t)x19 * x59) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + (((uint64_t)x31 * x47) + ((uint64_t)x30 * x45)))))))))));
-{ uint64_t x70 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + ((uint64_t)x13 * x33))))) + (0x9 * (((uint64_t)x15 * x58) + (((uint64_t)x17 * x59) + (((uint64_t)x19 * x57) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + (((uint64_t)x31 * x45) + ((uint64_t)x30 * x43))))))))))));
-{ uint64_t x71 = ((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + (((uint64_t)x9 * x35) + ((uint64_t)x11 * x33)))) + (0x9 * (((uint64_t)x13 * x58) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + ((uint64_t)x30 * x41)))))))))))));
-{ uint64_t x72 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + ((uint64_t)x9 * x33))) + (0x9 * (((uint64_t)x11 * x58) + (((uint64_t)x13 * x59) + (((uint64_t)x15 * x57) + (((uint64_t)x17 * x55) + (((uint64_t)x19 * x53) + (((uint64_t)x21 * x51) + (((uint64_t)x23 * x49) + (((uint64_t)x25 * x47) + (((uint64_t)x27 * x45) + (((uint64_t)x29 * x43) + (((uint64_t)x31 * x41) + ((uint64_t)x30 * x39))))))))))))));
-{ uint64_t x73 = ((((uint64_t)x5 * x35) + ((uint64_t)x7 * x33)) + (0x9 * (((uint64_t)x9 * x58) + (((uint64_t)x11 * x59) + (((uint64_t)x13 * x57) + (((uint64_t)x15 * x55) + (((uint64_t)x17 * x53) + (((uint64_t)x19 * x51) + (((uint64_t)x21 * x49) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + (((uint64_t)x27 * x43) + (((uint64_t)x29 * x41) + (((uint64_t)x31 * x39) + ((uint64_t)x30 * x37)))))))))))))));
-{ uint64_t x74 = (((uint64_t)x5 * x33) + (0x9 * (((uint64_t)x7 * x58) + (((uint64_t)x9 * x59) + (((uint64_t)x11 * x57) + (((uint64_t)x13 * x55) + (((uint64_t)x15 * x53) + (((uint64_t)x17 * x51) + (((uint64_t)x19 * x49) + (((uint64_t)x21 * x47) + (((uint64_t)x23 * x45) + (((uint64_t)x25 * x43) + (((uint64_t)x27 * x41) + (((uint64_t)x29 * x39) + (((uint64_t)x31 * x37) + ((uint64_t)x30 * x35))))))))))))))));
-{ uint32_t x75 = (uint32_t) (x74 >> 0x13);
-{ uint32_t x76 = ((uint32_t)x74 & 0x7ffff);
-{ uint64_t x77 = (x75 + x73);
-{ uint32_t x78 = (uint32_t) (x77 >> 0x13);
-{ uint32_t x79 = ((uint32_t)x77 & 0x7ffff);
-{ uint64_t x80 = (x78 + x72);
-{ uint32_t x81 = (uint32_t) (x80 >> 0x13);
-{ uint32_t x82 = ((uint32_t)x80 & 0x7ffff);
-{ uint64_t x83 = (x81 + x71);
-{ uint32_t x84 = (uint32_t) (x83 >> 0x13);
-{ uint32_t x85 = ((uint32_t)x83 & 0x7ffff);
-{ uint64_t x86 = (x84 + x70);
-{ uint32_t x87 = (uint32_t) (x86 >> 0x13);
-{ uint32_t x88 = ((uint32_t)x86 & 0x7ffff);
-{ uint64_t x89 = (x87 + x69);
-{ uint32_t x90 = (uint32_t) (x89 >> 0x13);
-{ uint32_t x91 = ((uint32_t)x89 & 0x7ffff);
-{ uint64_t x92 = (x90 + x68);
-{ uint32_t x93 = (uint32_t) (x92 >> 0x13);
-{ uint32_t x94 = ((uint32_t)x92 & 0x7ffff);
-{ uint64_t x95 = (x93 + x67);
-{ uint32_t x96 = (uint32_t) (x95 >> 0x13);
-{ uint32_t x97 = ((uint32_t)x95 & 0x7ffff);
-{ uint64_t x98 = (x96 + x66);
-{ uint32_t x99 = (uint32_t) (x98 >> 0x13);
-{ uint32_t x100 = ((uint32_t)x98 & 0x7ffff);
-{ uint64_t x101 = (x99 + x65);
-{ uint32_t x102 = (uint32_t) (x101 >> 0x13);
-{ uint32_t x103 = ((uint32_t)x101 & 0x7ffff);
-{ uint64_t x104 = (x102 + x64);
-{ uint32_t x105 = (uint32_t) (x104 >> 0x13);
-{ uint32_t x106 = ((uint32_t)x104 & 0x7ffff);
-{ uint64_t x107 = (x105 + x63);
-{ uint32_t x108 = (uint32_t) (x107 >> 0x13);
-{ uint32_t x109 = ((uint32_t)x107 & 0x7ffff);
-{ uint64_t x110 = (x108 + x62);
-{ uint32_t x111 = (uint32_t) (x110 >> 0x13);
-{ uint32_t x112 = ((uint32_t)x110 & 0x7ffff);
-{ uint64_t x113 = (x111 + x61);
-{ uint32_t x114 = (uint32_t) (x113 >> 0x13);
-{ uint32_t x115 = ((uint32_t)x113 & 0x7ffff);
-{ uint64_t x116 = (x114 + x60);
-{ uint32_t x117 = (uint32_t) (x116 >> 0x13);
-{ uint32_t x118 = ((uint32_t)x116 & 0x7ffff);
-{ uint32_t x119 = (x76 + (0x9 * x117));
-{ uint32_t x120 = (x119 >> 0x13);
-{ uint32_t x121 = (x119 & 0x7ffff);
-{ uint32_t x122 = (x120 + x79);
-{ uint32_t x123 = (x122 >> 0x13);
-{ uint32_t x124 = (x122 & 0x7ffff);
-out[0] = x118;
-out[1] = x115;
-out[2] = x112;
-out[3] = x109;
-out[4] = x106;
-out[5] = x103;
-out[6] = x100;
-out[7] = x97;
-out[8] = x94;
-out[9] = x91;
-out[10] = x88;
-out[11] = x85;
-out[12] = x123 + x82;
-out[13] = x124;
-out[14] = x121;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[15];
+static void femul(uint32_t out[15], const uint32_t in1[15], const uint32_t in2[15]) {
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x31 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x58 = in2[14];
+ { const uint32_t x59 = in2[13];
+ { const uint32_t x57 = in2[12];
+ { const uint32_t x55 = in2[11];
+ { const uint32_t x53 = in2[10];
+ { const uint32_t x51 = in2[9];
+ { const uint32_t x49 = in2[8];
+ { const uint32_t x47 = in2[7];
+ { const uint32_t x45 = in2[6];
+ { const uint32_t x43 = in2[5];
+ { const uint32_t x41 = in2[4];
+ { const uint32_t x39 = in2[3];
+ { const uint32_t x37 = in2[2];
+ { const uint32_t x35 = in2[1];
+ { const uint32_t x33 = in2[0];
+ { uint64_t x60 = (((uint64_t)x5 * x58) + (((uint64_t)x7 * x59) + (((uint64_t)x9 * x57) + (((uint64_t)x11 * x55) + (((uint64_t)x13 * x53) + (((uint64_t)x15 * x51) + (((uint64_t)x17 * x49) + (((uint64_t)x19 * x47) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + (((uint64_t)x27 * x39) + (((uint64_t)x29 * x37) + (((uint64_t)x31 * x35) + ((uint64_t)x30 * x33)))))))))))))));
+ { uint64_t x61 = ((((uint64_t)x5 * x59) + (((uint64_t)x7 * x57) + (((uint64_t)x9 * x55) + (((uint64_t)x11 * x53) + (((uint64_t)x13 * x51) + (((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + (((uint64_t)x19 * x45) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + (((uint64_t)x25 * x39) + (((uint64_t)x27 * x37) + (((uint64_t)x29 * x35) + ((uint64_t)x31 * x33)))))))))))))) + (0x9 * ((uint64_t)x30 * x58)));
+ { uint64_t x62 = ((((uint64_t)x5 * x57) + (((uint64_t)x7 * x55) + (((uint64_t)x9 * x53) + (((uint64_t)x11 * x51) + (((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + (((uint64_t)x23 * x39) + (((uint64_t)x25 * x37) + (((uint64_t)x27 * x35) + ((uint64_t)x29 * x33))))))))))))) + (0x9 * (((uint64_t)x31 * x58) + ((uint64_t)x30 * x59))));
+ { uint64_t x63 = ((((uint64_t)x5 * x55) + (((uint64_t)x7 * x53) + (((uint64_t)x9 * x51) + (((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + (((uint64_t)x25 * x35) + ((uint64_t)x27 * x33)))))))))))) + (0x9 * (((uint64_t)x29 * x58) + (((uint64_t)x31 * x59) + ((uint64_t)x30 * x57)))));
+ { uint64_t x64 = ((((uint64_t)x5 * x53) + (((uint64_t)x7 * x51) + (((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + ((uint64_t)x25 * x33))))))))))) + (0x9 * (((uint64_t)x27 * x58) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + ((uint64_t)x30 * x55))))));
+ { uint64_t x65 = ((((uint64_t)x5 * x51) + (((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + ((uint64_t)x23 * x33)))))))))) + (0x9 * (((uint64_t)x25 * x58) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + ((uint64_t)x30 * x53)))))));
+ { uint64_t x66 = ((((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + ((uint64_t)x21 * x33))))))))) + (0x9 * (((uint64_t)x23 * x58) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + ((uint64_t)x30 * x51))))))));
+ { uint64_t x67 = ((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + ((uint64_t)x19 * x33)))))))) + (0x9 * (((uint64_t)x21 * x58) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + (((uint64_t)x31 * x51) + ((uint64_t)x30 * x49)))))))));
+ { uint64_t x68 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + ((uint64_t)x17 * x33))))))) + (0x9 * (((uint64_t)x19 * x58) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + (((uint64_t)x29 * x51) + (((uint64_t)x31 * x49) + ((uint64_t)x30 * x47))))))))));
+ { uint64_t x69 = ((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + ((uint64_t)x15 * x33)))))) + (0x9 * (((uint64_t)x17 * x58) + (((uint64_t)x19 * x59) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + (((uint64_t)x31 * x47) + ((uint64_t)x30 * x45)))))))))));
+ { uint64_t x70 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + ((uint64_t)x13 * x33))))) + (0x9 * (((uint64_t)x15 * x58) + (((uint64_t)x17 * x59) + (((uint64_t)x19 * x57) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + (((uint64_t)x31 * x45) + ((uint64_t)x30 * x43))))))))))));
+ { uint64_t x71 = ((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + (((uint64_t)x9 * x35) + ((uint64_t)x11 * x33)))) + (0x9 * (((uint64_t)x13 * x58) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + ((uint64_t)x30 * x41)))))))))))));
+ { uint64_t x72 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + ((uint64_t)x9 * x33))) + (0x9 * (((uint64_t)x11 * x58) + (((uint64_t)x13 * x59) + (((uint64_t)x15 * x57) + (((uint64_t)x17 * x55) + (((uint64_t)x19 * x53) + (((uint64_t)x21 * x51) + (((uint64_t)x23 * x49) + (((uint64_t)x25 * x47) + (((uint64_t)x27 * x45) + (((uint64_t)x29 * x43) + (((uint64_t)x31 * x41) + ((uint64_t)x30 * x39))))))))))))));
+ { uint64_t x73 = ((((uint64_t)x5 * x35) + ((uint64_t)x7 * x33)) + (0x9 * (((uint64_t)x9 * x58) + (((uint64_t)x11 * x59) + (((uint64_t)x13 * x57) + (((uint64_t)x15 * x55) + (((uint64_t)x17 * x53) + (((uint64_t)x19 * x51) + (((uint64_t)x21 * x49) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + (((uint64_t)x27 * x43) + (((uint64_t)x29 * x41) + (((uint64_t)x31 * x39) + ((uint64_t)x30 * x37)))))))))))))));
+ { uint64_t x74 = (((uint64_t)x5 * x33) + (0x9 * (((uint64_t)x7 * x58) + (((uint64_t)x9 * x59) + (((uint64_t)x11 * x57) + (((uint64_t)x13 * x55) + (((uint64_t)x15 * x53) + (((uint64_t)x17 * x51) + (((uint64_t)x19 * x49) + (((uint64_t)x21 * x47) + (((uint64_t)x23 * x45) + (((uint64_t)x25 * x43) + (((uint64_t)x27 * x41) + (((uint64_t)x29 * x39) + (((uint64_t)x31 * x37) + ((uint64_t)x30 * x35))))))))))))))));
+ { uint32_t x75 = (uint32_t) (x74 >> 0x13);
+ { uint32_t x76 = ((uint32_t)x74 & 0x7ffff);
+ { uint64_t x77 = (x75 + x73);
+ { uint32_t x78 = (uint32_t) (x77 >> 0x13);
+ { uint32_t x79 = ((uint32_t)x77 & 0x7ffff);
+ { uint64_t x80 = (x78 + x72);
+ { uint32_t x81 = (uint32_t) (x80 >> 0x13);
+ { uint32_t x82 = ((uint32_t)x80 & 0x7ffff);
+ { uint64_t x83 = (x81 + x71);
+ { uint32_t x84 = (uint32_t) (x83 >> 0x13);
+ { uint32_t x85 = ((uint32_t)x83 & 0x7ffff);
+ { uint64_t x86 = (x84 + x70);
+ { uint32_t x87 = (uint32_t) (x86 >> 0x13);
+ { uint32_t x88 = ((uint32_t)x86 & 0x7ffff);
+ { uint64_t x89 = (x87 + x69);
+ { uint32_t x90 = (uint32_t) (x89 >> 0x13);
+ { uint32_t x91 = ((uint32_t)x89 & 0x7ffff);
+ { uint64_t x92 = (x90 + x68);
+ { uint32_t x93 = (uint32_t) (x92 >> 0x13);
+ { uint32_t x94 = ((uint32_t)x92 & 0x7ffff);
+ { uint64_t x95 = (x93 + x67);
+ { uint32_t x96 = (uint32_t) (x95 >> 0x13);
+ { uint32_t x97 = ((uint32_t)x95 & 0x7ffff);
+ { uint64_t x98 = (x96 + x66);
+ { uint32_t x99 = (uint32_t) (x98 >> 0x13);
+ { uint32_t x100 = ((uint32_t)x98 & 0x7ffff);
+ { uint64_t x101 = (x99 + x65);
+ { uint32_t x102 = (uint32_t) (x101 >> 0x13);
+ { uint32_t x103 = ((uint32_t)x101 & 0x7ffff);
+ { uint64_t x104 = (x102 + x64);
+ { uint32_t x105 = (uint32_t) (x104 >> 0x13);
+ { uint32_t x106 = ((uint32_t)x104 & 0x7ffff);
+ { uint64_t x107 = (x105 + x63);
+ { uint32_t x108 = (uint32_t) (x107 >> 0x13);
+ { uint32_t x109 = ((uint32_t)x107 & 0x7ffff);
+ { uint64_t x110 = (x108 + x62);
+ { uint32_t x111 = (uint32_t) (x110 >> 0x13);
+ { uint32_t x112 = ((uint32_t)x110 & 0x7ffff);
+ { uint64_t x113 = (x111 + x61);
+ { uint32_t x114 = (uint32_t) (x113 >> 0x13);
+ { uint32_t x115 = ((uint32_t)x113 & 0x7ffff);
+ { uint64_t x116 = (x114 + x60);
+ { uint32_t x117 = (uint32_t) (x116 >> 0x13);
+ { uint32_t x118 = ((uint32_t)x116 & 0x7ffff);
+ { uint32_t x119 = (x76 + (0x9 * x117));
+ { uint32_t x120 = (x119 >> 0x13);
+ { uint32_t x121 = (x119 & 0x7ffff);
+ { uint32_t x122 = (x120 + x79);
+ { uint32_t x123 = (x122 >> 0x13);
+ { uint32_t x124 = (x122 & 0x7ffff);
+ out[0] = x121;
+ out[1] = x124;
+ out[2] = (x123 + x82);
+ out[3] = x85;
+ out[4] = x88;
+ out[5] = x91;
+ out[6] = x94;
+ out[7] = x97;
+ out[8] = x100;
+ out[9] = x103;
+ out[10] = x106;
+ out[11] = x109;
+ out[12] = x112;
+ out[13] = x115;
+ out[14] = x118;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e285m9/fesquare.c b/src/Specific/solinas32_2e285m9/fesquare.c
index 729af2c0a..ecb5f11c8 100644
--- a/src/Specific/solinas32_2e285m9/fesquare.c
+++ b/src/Specific/solinas32_2e285m9/fesquare.c
@@ -1,101 +1,98 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x29 = (((uint64_t)x2 * x27) + (((uint64_t)x4 * x28) + (((uint64_t)x6 * x26) + (((uint64_t)x8 * x24) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + (((uint64_t)x24 * x8) + (((uint64_t)x26 * x6) + (((uint64_t)x28 * x4) + ((uint64_t)x27 * x2)))))))))))))));
-{ uint64_t x30 = ((((uint64_t)x2 * x28) + (((uint64_t)x4 * x26) + (((uint64_t)x6 * x24) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + (((uint64_t)x24 * x6) + (((uint64_t)x26 * x4) + ((uint64_t)x28 * x2)))))))))))))) + (0x9 * ((uint64_t)x27 * x27)));
-{ uint64_t x31 = ((((uint64_t)x2 * x26) + (((uint64_t)x4 * x24) + (((uint64_t)x6 * x22) + (((uint64_t)x8 * x20) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + (((uint64_t)x20 * x8) + (((uint64_t)x22 * x6) + (((uint64_t)x24 * x4) + ((uint64_t)x26 * x2))))))))))))) + (0x9 * (((uint64_t)x28 * x27) + ((uint64_t)x27 * x28))));
-{ uint64_t x32 = ((((uint64_t)x2 * x24) + (((uint64_t)x4 * x22) + (((uint64_t)x6 * x20) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + (((uint64_t)x20 * x6) + (((uint64_t)x22 * x4) + ((uint64_t)x24 * x2)))))))))))) + (0x9 * (((uint64_t)x26 * x27) + (((uint64_t)x28 * x28) + ((uint64_t)x27 * x26)))));
-{ uint64_t x33 = ((((uint64_t)x2 * x22) + (((uint64_t)x4 * x20) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + (((uint64_t)x20 * x4) + ((uint64_t)x22 * x2))))))))))) + (0x9 * (((uint64_t)x24 * x27) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + ((uint64_t)x27 * x24))))));
-{ uint64_t x34 = ((((uint64_t)x2 * x20) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x20 * x2)))))))))) + (0x9 * (((uint64_t)x22 * x27) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + ((uint64_t)x27 * x22)))))));
-{ uint64_t x35 = ((((uint64_t)x2 * x18) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x18 * x2))))))))) + (0x9 * (((uint64_t)x20 * x27) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + ((uint64_t)x27 * x20))))))));
-{ uint64_t x36 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x9 * (((uint64_t)x18 * x27) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + ((uint64_t)x27 * x18)))))))));
-{ uint64_t x37 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x9 * (((uint64_t)x16 * x27) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + (((uint64_t)x28 * x18) + ((uint64_t)x27 * x16))))))))));
-{ uint64_t x38 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x9 * (((uint64_t)x14 * x27) + (((uint64_t)x16 * x28) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + (((uint64_t)x26 * x18) + (((uint64_t)x28 * x16) + ((uint64_t)x27 * x14)))))))))));
-{ uint64_t x39 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x9 * (((uint64_t)x12 * x27) + (((uint64_t)x14 * x28) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + (((uint64_t)x26 * x16) + (((uint64_t)x28 * x14) + ((uint64_t)x27 * x12))))))))))));
-{ uint64_t x40 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x9 * (((uint64_t)x10 * x27) + (((uint64_t)x12 * x28) + (((uint64_t)x14 * x26) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + (((uint64_t)x26 * x14) + (((uint64_t)x28 * x12) + ((uint64_t)x27 * x10)))))))))))));
-{ uint64_t x41 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x9 * (((uint64_t)x8 * x27) + (((uint64_t)x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + (((uint64_t)x28 * x10) + ((uint64_t)x27 * x8))))))))))))));
-{ uint64_t x42 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x9 * (((uint64_t)x6 * x27) + (((uint64_t)x8 * x28) + (((uint64_t)x10 * x26) + (((uint64_t)x12 * x24) + (((uint64_t)x14 * x22) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + (((uint64_t)x24 * x12) + (((uint64_t)x26 * x10) + (((uint64_t)x28 * x8) + ((uint64_t)x27 * x6)))))))))))))));
-{ uint64_t x43 = (((uint64_t)x2 * x2) + (0x9 * (((uint64_t)x4 * x27) + (((uint64_t)x6 * x28) + (((uint64_t)x8 * x26) + (((uint64_t)x10 * x24) + (((uint64_t)x12 * x22) + (((uint64_t)x14 * x20) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + (((uint64_t)x20 * x14) + (((uint64_t)x22 * x12) + (((uint64_t)x24 * x10) + (((uint64_t)x26 * x8) + (((uint64_t)x28 * x6) + ((uint64_t)x27 * x4))))))))))))))));
-{ uint32_t x44 = (uint32_t) (x43 >> 0x13);
-{ uint32_t x45 = ((uint32_t)x43 & 0x7ffff);
-{ uint64_t x46 = (x44 + x42);
-{ uint32_t x47 = (uint32_t) (x46 >> 0x13);
-{ uint32_t x48 = ((uint32_t)x46 & 0x7ffff);
-{ uint64_t x49 = (x47 + x41);
-{ uint32_t x50 = (uint32_t) (x49 >> 0x13);
-{ uint32_t x51 = ((uint32_t)x49 & 0x7ffff);
-{ uint64_t x52 = (x50 + x40);
-{ uint32_t x53 = (uint32_t) (x52 >> 0x13);
-{ uint32_t x54 = ((uint32_t)x52 & 0x7ffff);
-{ uint64_t x55 = (x53 + x39);
-{ uint32_t x56 = (uint32_t) (x55 >> 0x13);
-{ uint32_t x57 = ((uint32_t)x55 & 0x7ffff);
-{ uint64_t x58 = (x56 + x38);
-{ uint32_t x59 = (uint32_t) (x58 >> 0x13);
-{ uint32_t x60 = ((uint32_t)x58 & 0x7ffff);
-{ uint64_t x61 = (x59 + x37);
-{ uint32_t x62 = (uint32_t) (x61 >> 0x13);
-{ uint32_t x63 = ((uint32_t)x61 & 0x7ffff);
-{ uint64_t x64 = (x62 + x36);
-{ uint32_t x65 = (uint32_t) (x64 >> 0x13);
-{ uint32_t x66 = ((uint32_t)x64 & 0x7ffff);
-{ uint64_t x67 = (x65 + x35);
-{ uint32_t x68 = (uint32_t) (x67 >> 0x13);
-{ uint32_t x69 = ((uint32_t)x67 & 0x7ffff);
-{ uint64_t x70 = (x68 + x34);
-{ uint32_t x71 = (uint32_t) (x70 >> 0x13);
-{ uint32_t x72 = ((uint32_t)x70 & 0x7ffff);
-{ uint64_t x73 = (x71 + x33);
-{ uint32_t x74 = (uint32_t) (x73 >> 0x13);
-{ uint32_t x75 = ((uint32_t)x73 & 0x7ffff);
-{ uint64_t x76 = (x74 + x32);
-{ uint32_t x77 = (uint32_t) (x76 >> 0x13);
-{ uint32_t x78 = ((uint32_t)x76 & 0x7ffff);
-{ uint64_t x79 = (x77 + x31);
-{ uint32_t x80 = (uint32_t) (x79 >> 0x13);
-{ uint32_t x81 = ((uint32_t)x79 & 0x7ffff);
-{ uint64_t x82 = (x80 + x30);
-{ uint32_t x83 = (uint32_t) (x82 >> 0x13);
-{ uint32_t x84 = ((uint32_t)x82 & 0x7ffff);
-{ uint64_t x85 = (x83 + x29);
-{ uint32_t x86 = (uint32_t) (x85 >> 0x13);
-{ uint32_t x87 = ((uint32_t)x85 & 0x7ffff);
-{ uint32_t x88 = (x45 + (0x9 * x86));
-{ uint32_t x89 = (x88 >> 0x13);
-{ uint32_t x90 = (x88 & 0x7ffff);
-{ uint32_t x91 = (x89 + x48);
-{ uint32_t x92 = (x91 >> 0x13);
-{ uint32_t x93 = (x91 & 0x7ffff);
-out[0] = x87;
-out[1] = x84;
-out[2] = x81;
-out[3] = x78;
-out[4] = x75;
-out[5] = x72;
-out[6] = x69;
-out[7] = x66;
-out[8] = x63;
-out[9] = x60;
-out[10] = x57;
-out[11] = x54;
-out[12] = x92 + x51;
-out[13] = x93;
-out[14] = x90;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[15];
+static void fesquare(uint32_t out[15], const uint32_t in1[15]) {
+ { const uint32_t x27 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x29 = (((uint64_t)x2 * x27) + (((uint64_t)x4 * x28) + (((uint64_t)x6 * x26) + (((uint64_t)x8 * x24) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + (((uint64_t)x24 * x8) + (((uint64_t)x26 * x6) + (((uint64_t)x28 * x4) + ((uint64_t)x27 * x2)))))))))))))));
+ { uint64_t x30 = ((((uint64_t)x2 * x28) + (((uint64_t)x4 * x26) + (((uint64_t)x6 * x24) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + (((uint64_t)x24 * x6) + (((uint64_t)x26 * x4) + ((uint64_t)x28 * x2)))))))))))))) + (0x9 * ((uint64_t)x27 * x27)));
+ { uint64_t x31 = ((((uint64_t)x2 * x26) + (((uint64_t)x4 * x24) + (((uint64_t)x6 * x22) + (((uint64_t)x8 * x20) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + (((uint64_t)x20 * x8) + (((uint64_t)x22 * x6) + (((uint64_t)x24 * x4) + ((uint64_t)x26 * x2))))))))))))) + (0x9 * (((uint64_t)x28 * x27) + ((uint64_t)x27 * x28))));
+ { uint64_t x32 = ((((uint64_t)x2 * x24) + (((uint64_t)x4 * x22) + (((uint64_t)x6 * x20) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + (((uint64_t)x20 * x6) + (((uint64_t)x22 * x4) + ((uint64_t)x24 * x2)))))))))))) + (0x9 * (((uint64_t)x26 * x27) + (((uint64_t)x28 * x28) + ((uint64_t)x27 * x26)))));
+ { uint64_t x33 = ((((uint64_t)x2 * x22) + (((uint64_t)x4 * x20) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + (((uint64_t)x20 * x4) + ((uint64_t)x22 * x2))))))))))) + (0x9 * (((uint64_t)x24 * x27) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + ((uint64_t)x27 * x24))))));
+ { uint64_t x34 = ((((uint64_t)x2 * x20) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x20 * x2)))))))))) + (0x9 * (((uint64_t)x22 * x27) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + ((uint64_t)x27 * x22)))))));
+ { uint64_t x35 = ((((uint64_t)x2 * x18) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x18 * x2))))))))) + (0x9 * (((uint64_t)x20 * x27) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + ((uint64_t)x27 * x20))))))));
+ { uint64_t x36 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x9 * (((uint64_t)x18 * x27) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + ((uint64_t)x27 * x18)))))))));
+ { uint64_t x37 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x9 * (((uint64_t)x16 * x27) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + (((uint64_t)x28 * x18) + ((uint64_t)x27 * x16))))))))));
+ { uint64_t x38 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x9 * (((uint64_t)x14 * x27) + (((uint64_t)x16 * x28) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + (((uint64_t)x26 * x18) + (((uint64_t)x28 * x16) + ((uint64_t)x27 * x14)))))))))));
+ { uint64_t x39 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x9 * (((uint64_t)x12 * x27) + (((uint64_t)x14 * x28) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + (((uint64_t)x26 * x16) + (((uint64_t)x28 * x14) + ((uint64_t)x27 * x12))))))))))));
+ { uint64_t x40 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x9 * (((uint64_t)x10 * x27) + (((uint64_t)x12 * x28) + (((uint64_t)x14 * x26) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + (((uint64_t)x26 * x14) + (((uint64_t)x28 * x12) + ((uint64_t)x27 * x10)))))))))))));
+ { uint64_t x41 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x9 * (((uint64_t)x8 * x27) + (((uint64_t)x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + (((uint64_t)x28 * x10) + ((uint64_t)x27 * x8))))))))))))));
+ { uint64_t x42 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x9 * (((uint64_t)x6 * x27) + (((uint64_t)x8 * x28) + (((uint64_t)x10 * x26) + (((uint64_t)x12 * x24) + (((uint64_t)x14 * x22) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + (((uint64_t)x24 * x12) + (((uint64_t)x26 * x10) + (((uint64_t)x28 * x8) + ((uint64_t)x27 * x6)))))))))))))));
+ { uint64_t x43 = (((uint64_t)x2 * x2) + (0x9 * (((uint64_t)x4 * x27) + (((uint64_t)x6 * x28) + (((uint64_t)x8 * x26) + (((uint64_t)x10 * x24) + (((uint64_t)x12 * x22) + (((uint64_t)x14 * x20) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + (((uint64_t)x20 * x14) + (((uint64_t)x22 * x12) + (((uint64_t)x24 * x10) + (((uint64_t)x26 * x8) + (((uint64_t)x28 * x6) + ((uint64_t)x27 * x4))))))))))))))));
+ { uint32_t x44 = (uint32_t) (x43 >> 0x13);
+ { uint32_t x45 = ((uint32_t)x43 & 0x7ffff);
+ { uint64_t x46 = (x44 + x42);
+ { uint32_t x47 = (uint32_t) (x46 >> 0x13);
+ { uint32_t x48 = ((uint32_t)x46 & 0x7ffff);
+ { uint64_t x49 = (x47 + x41);
+ { uint32_t x50 = (uint32_t) (x49 >> 0x13);
+ { uint32_t x51 = ((uint32_t)x49 & 0x7ffff);
+ { uint64_t x52 = (x50 + x40);
+ { uint32_t x53 = (uint32_t) (x52 >> 0x13);
+ { uint32_t x54 = ((uint32_t)x52 & 0x7ffff);
+ { uint64_t x55 = (x53 + x39);
+ { uint32_t x56 = (uint32_t) (x55 >> 0x13);
+ { uint32_t x57 = ((uint32_t)x55 & 0x7ffff);
+ { uint64_t x58 = (x56 + x38);
+ { uint32_t x59 = (uint32_t) (x58 >> 0x13);
+ { uint32_t x60 = ((uint32_t)x58 & 0x7ffff);
+ { uint64_t x61 = (x59 + x37);
+ { uint32_t x62 = (uint32_t) (x61 >> 0x13);
+ { uint32_t x63 = ((uint32_t)x61 & 0x7ffff);
+ { uint64_t x64 = (x62 + x36);
+ { uint32_t x65 = (uint32_t) (x64 >> 0x13);
+ { uint32_t x66 = ((uint32_t)x64 & 0x7ffff);
+ { uint64_t x67 = (x65 + x35);
+ { uint32_t x68 = (uint32_t) (x67 >> 0x13);
+ { uint32_t x69 = ((uint32_t)x67 & 0x7ffff);
+ { uint64_t x70 = (x68 + x34);
+ { uint32_t x71 = (uint32_t) (x70 >> 0x13);
+ { uint32_t x72 = ((uint32_t)x70 & 0x7ffff);
+ { uint64_t x73 = (x71 + x33);
+ { uint32_t x74 = (uint32_t) (x73 >> 0x13);
+ { uint32_t x75 = ((uint32_t)x73 & 0x7ffff);
+ { uint64_t x76 = (x74 + x32);
+ { uint32_t x77 = (uint32_t) (x76 >> 0x13);
+ { uint32_t x78 = ((uint32_t)x76 & 0x7ffff);
+ { uint64_t x79 = (x77 + x31);
+ { uint32_t x80 = (uint32_t) (x79 >> 0x13);
+ { uint32_t x81 = ((uint32_t)x79 & 0x7ffff);
+ { uint64_t x82 = (x80 + x30);
+ { uint32_t x83 = (uint32_t) (x82 >> 0x13);
+ { uint32_t x84 = ((uint32_t)x82 & 0x7ffff);
+ { uint64_t x85 = (x83 + x29);
+ { uint32_t x86 = (uint32_t) (x85 >> 0x13);
+ { uint32_t x87 = ((uint32_t)x85 & 0x7ffff);
+ { uint32_t x88 = (x45 + (0x9 * x86));
+ { uint32_t x89 = (x88 >> 0x13);
+ { uint32_t x90 = (x88 & 0x7ffff);
+ { uint32_t x91 = (x89 + x48);
+ { uint32_t x92 = (x91 >> 0x13);
+ { uint32_t x93 = (x91 & 0x7ffff);
+ out[0] = x90;
+ out[1] = x93;
+ out[2] = (x92 + x51);
+ out[3] = x54;
+ out[4] = x57;
+ out[5] = x60;
+ out[6] = x63;
+ out[7] = x66;
+ out[8] = x69;
+ out[9] = x72;
+ out[10] = x75;
+ out[11] = x78;
+ out[12] = x81;
+ out[13] = x84;
+ out[14] = x87;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e285m9/freeze.c b/src/Specific/solinas32_2e285m9/freeze.c
index 04cab1e28..afe1b2ee2 100644
--- a/src/Specific/solinas32_2e285m9/freeze.c
+++ b/src/Specific/solinas32_2e285m9/freeze.c
@@ -1,25 +1,79 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x30;
-out[1] = uint8_t x31 = Op Syntax.SubWithGetBorrow 19 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7fff7;;
+static void freeze(uint32_t out[15], const uint32_t in1[15]) {
+ { const uint32_t x27 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x30, uint8_t x31 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x7fff7);
+ { uint32_t x33, uint8_t x34 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x31, Return x4, 0x7ffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x34, Return x6, 0x7ffff);
+ { uint32_t x39, uint8_t x40 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x8, 0x7ffff);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x40, Return x10, 0x7ffff);
+ { uint32_t x45, uint8_t x46 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x43, Return x12, 0x7ffff);
+ { uint32_t x48, uint8_t x49 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x46, Return x14, 0x7ffff);
+ { uint32_t x51, uint8_t x52 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x49, Return x16, 0x7ffff);
+ { uint32_t x54, uint8_t x55 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x52, Return x18, 0x7ffff);
+ { uint32_t x57, uint8_t x58 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x55, Return x20, 0x7ffff);
+ { uint32_t x60, uint8_t x61 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x58, Return x22, 0x7ffff);
+ { uint32_t x63, uint8_t x64 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x61, Return x24, 0x7ffff);
+ { uint32_t x66, uint8_t x67 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x64, Return x26, 0x7ffff);
+ { uint32_t x69, uint8_t x70 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x67, Return x28, 0x7ffff);
+ { uint32_t x72, uint8_t x73 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x70, Return x27, 0x7ffff);
+ { uint32_t x74 = (uint32_t)cmovznz(x73, 0x0, 0xffffffff);
+ { uint32_t x75 = (x74 & 0x7fff7);
+ { uint32_t x77, uint8_t x78 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x30, Return x75);
+ { uint32_t x79 = (x74 & 0x7ffff);
+ { uint32_t x81, uint8_t x82 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x78, Return x33, Return x79);
+ { uint32_t x83 = (x74 & 0x7ffff);
+ { uint32_t x85, uint8_t x86 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x82, Return x36, Return x83);
+ { uint32_t x87 = (x74 & 0x7ffff);
+ { uint32_t x89, uint8_t x90 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x86, Return x39, Return x87);
+ { uint32_t x91 = (x74 & 0x7ffff);
+ { uint32_t x93, uint8_t x94 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x90, Return x42, Return x91);
+ { uint32_t x95 = (x74 & 0x7ffff);
+ { uint32_t x97, uint8_t x98 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x94, Return x45, Return x95);
+ { uint32_t x99 = (x74 & 0x7ffff);
+ { uint32_t x101, uint8_t x102 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x98, Return x48, Return x99);
+ { uint32_t x103 = (x74 & 0x7ffff);
+ { uint32_t x105, uint8_t x106 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x102, Return x51, Return x103);
+ { uint32_t x107 = (x74 & 0x7ffff);
+ { uint32_t x109, uint8_t x110 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x106, Return x54, Return x107);
+ { uint32_t x111 = (x74 & 0x7ffff);
+ { uint32_t x113, uint8_t x114 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x110, Return x57, Return x111);
+ { uint32_t x115 = (x74 & 0x7ffff);
+ { uint32_t x117, uint8_t x118 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x114, Return x60, Return x115);
+ { uint32_t x119 = (x74 & 0x7ffff);
+ { uint32_t x121, uint8_t x122 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x118, Return x63, Return x119);
+ { uint32_t x123 = (x74 & 0x7ffff);
+ { uint32_t x125, uint8_t x126 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x122, Return x66, Return x123);
+ { uint32_t x127 = (x74 & 0x7ffff);
+ { uint32_t x129, uint8_t x130 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x126, Return x69, Return x127);
+ { uint32_t x131 = (x74 & 0x7ffff);
+ { uint32_t x133, uint8_t _ = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x130, Return x72, Return x131);
+ out[0] = x77;
+ out[1] = x81;
+ out[2] = x85;
+ out[3] = x89;
+ out[4] = x93;
+ out[5] = x97;
+ out[6] = x101;
+ out[7] = x105;
+ out[8] = x109;
+ out[9] = x113;
+ out[10] = x117;
+ out[11] = x121;
+ out[12] = x125;
+ out[13] = x129;
+ out[14] = x133;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e291m19/femul.c b/src/Specific/solinas32_2e291m19/femul.c
index 204f8c0ab..9fdc08fa1 100644
--- a/src/Specific/solinas32_2e291m19/femul.c
+++ b/src/Specific/solinas32_2e291m19/femul.c
@@ -1,86 +1,92 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
-{ uint64_t x48 = (((uint64_t)x5 * x46) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + ((0x2 * ((uint64_t)x15 * x39)) + ((0x2 * ((uint64_t)x17 * x37)) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((0x2 * ((uint64_t)x23 * x31)) + ((0x2 * ((uint64_t)x25 * x29)) + ((uint64_t)x24 * x27))))))))))));
-{ uint64_t x49 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + ((0x2 * ((uint64_t)x15 * x37)) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((0x2 * ((uint64_t)x23 * x29)) + ((uint64_t)x25 * x27))))))))))) + (0x13 * ((uint64_t)x24 * x46)));
-{ uint64_t x50 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x23 * x27)))))))))) + (0x13 * (((uint64_t)x25 * x46) + ((uint64_t)x24 * x47))));
-{ uint64_t x51 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + (((uint64_t)x13 * x35) + ((0x2 * ((uint64_t)x15 * x33)) + ((0x2 * ((uint64_t)x17 * x31)) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x21 * x27))))))))) + (0x13 * ((0x2 * ((uint64_t)x23 * x46)) + ((0x2 * ((uint64_t)x25 * x47)) + (0x2 * ((uint64_t)x24 * x45))))));
-{ uint64_t x52 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((uint64_t)x19 * x27)))))))) + (0x13 * (((uint64_t)x21 * x46) + ((0x2 * ((uint64_t)x23 * x47)) + ((0x2 * ((uint64_t)x25 * x45)) + ((uint64_t)x24 * x43))))));
-{ uint64_t x53 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + ((0x2 * ((uint64_t)x15 * x29)) + ((uint64_t)x17 * x27))))))) + (0x13 * (((uint64_t)x19 * x46) + (((uint64_t)x21 * x47) + ((0x2 * ((uint64_t)x23 * x45)) + (((uint64_t)x25 * x43) + ((uint64_t)x24 * x41)))))));
-{ uint64_t x54 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + ((uint64_t)x15 * x27)))))) + (0x13 * (((uint64_t)x17 * x46) + (((uint64_t)x19 * x47) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + ((uint64_t)x24 * x39))))))));
-{ uint64_t x55 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((uint64_t)x13 * x27))))) + (0x13 * ((0x2 * ((uint64_t)x15 * x46)) + ((0x2 * ((uint64_t)x17 * x47)) + ((0x2 * ((uint64_t)x19 * x45)) + (((uint64_t)x21 * x43) + ((0x2 * ((uint64_t)x23 * x41)) + ((0x2 * ((uint64_t)x25 * x39)) + (0x2 * ((uint64_t)x24 * x37))))))))));
-{ uint64_t x56 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((uint64_t)x11 * x27)))) + (0x13 * (((uint64_t)x13 * x46) + ((0x2 * ((uint64_t)x15 * x47)) + ((0x2 * ((uint64_t)x17 * x45)) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + ((0x2 * ((uint64_t)x23 * x39)) + ((0x2 * ((uint64_t)x25 * x37)) + ((uint64_t)x24 * x35))))))))));
-{ uint64_t x57 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((uint64_t)x9 * x27))) + (0x13 * (((uint64_t)x11 * x46) + (((uint64_t)x13 * x47) + ((0x2 * ((uint64_t)x15 * x45)) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + ((0x2 * ((uint64_t)x23 * x37)) + (((uint64_t)x25 * x35) + ((uint64_t)x24 * x33)))))))))));
-{ uint64_t x58 = ((((uint64_t)x5 * x29) + ((uint64_t)x7 * x27)) + (0x13 * (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31))))))))))));
-{ uint64_t x59 = (((uint64_t)x5 * x27) + (0x13 * ((0x2 * ((uint64_t)x7 * x46)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + (((uint64_t)x13 * x43) + ((0x2 * ((uint64_t)x15 * x41)) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + (((uint64_t)x21 * x35) + ((0x2 * ((uint64_t)x23 * x33)) + ((0x2 * ((uint64_t)x25 * x31)) + (0x2 * ((uint64_t)x24 * x29))))))))))))));
-{ uint64_t x60 = (x59 >> 0x19);
-{ uint32_t x61 = ((uint32_t)x59 & 0x1ffffff);
-{ uint64_t x62 = (x60 + x58);
-{ uint64_t x63 = (x62 >> 0x18);
-{ uint32_t x64 = ((uint32_t)x62 & 0xffffff);
-{ uint64_t x65 = (x63 + x57);
-{ uint64_t x66 = (x65 >> 0x18);
-{ uint32_t x67 = ((uint32_t)x65 & 0xffffff);
-{ uint64_t x68 = (x66 + x56);
-{ uint64_t x69 = (x68 >> 0x18);
-{ uint32_t x70 = ((uint32_t)x68 & 0xffffff);
-{ uint64_t x71 = (x69 + x55);
-{ uint64_t x72 = (x71 >> 0x19);
-{ uint32_t x73 = ((uint32_t)x71 & 0x1ffffff);
-{ uint64_t x74 = (x72 + x54);
-{ uint64_t x75 = (x74 >> 0x18);
-{ uint32_t x76 = ((uint32_t)x74 & 0xffffff);
-{ uint64_t x77 = (x75 + x53);
-{ uint64_t x78 = (x77 >> 0x18);
-{ uint32_t x79 = ((uint32_t)x77 & 0xffffff);
-{ uint64_t x80 = (x78 + x52);
-{ uint64_t x81 = (x80 >> 0x18);
-{ uint32_t x82 = ((uint32_t)x80 & 0xffffff);
-{ uint64_t x83 = (x81 + x51);
-{ uint64_t x84 = (x83 >> 0x19);
-{ uint32_t x85 = ((uint32_t)x83 & 0x1ffffff);
-{ uint64_t x86 = (x84 + x50);
-{ uint64_t x87 = (x86 >> 0x18);
-{ uint32_t x88 = ((uint32_t)x86 & 0xffffff);
-{ uint64_t x89 = (x87 + x49);
-{ uint64_t x90 = (x89 >> 0x18);
-{ uint32_t x91 = ((uint32_t)x89 & 0xffffff);
-{ uint64_t x92 = (x90 + x48);
-{ uint64_t x93 = (x92 >> 0x18);
-{ uint32_t x94 = ((uint32_t)x92 & 0xffffff);
-{ uint64_t x95 = (x61 + (0x13 * x93));
-{ uint32_t x96 = (uint32_t) (x95 >> 0x19);
-{ uint32_t x97 = ((uint32_t)x95 & 0x1ffffff);
-{ uint32_t x98 = (x96 + x64);
-{ uint32_t x99 = (x98 >> 0x18);
-{ uint32_t x100 = (x98 & 0xffffff);
-out[0] = x94;
-out[1] = x91;
-out[2] = x88;
-out[3] = x85;
-out[4] = x82;
-out[5] = x79;
-out[6] = x76;
-out[7] = x73;
-out[8] = x70;
-out[9] = x99 + x67;
-out[10] = x100;
-out[11] = x97;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void femul(uint32_t out[12], const uint32_t in1[12], const uint32_t in2[12]) {
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x46 = in2[11];
+ { const uint32_t x47 = in2[10];
+ { const uint32_t x45 = in2[9];
+ { const uint32_t x43 = in2[8];
+ { const uint32_t x41 = in2[7];
+ { const uint32_t x39 = in2[6];
+ { const uint32_t x37 = in2[5];
+ { const uint32_t x35 = in2[4];
+ { const uint32_t x33 = in2[3];
+ { const uint32_t x31 = in2[2];
+ { const uint32_t x29 = in2[1];
+ { const uint32_t x27 = in2[0];
+ { uint64_t x48 = (((uint64_t)x5 * x46) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + ((0x2 * ((uint64_t)x15 * x39)) + ((0x2 * ((uint64_t)x17 * x37)) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((0x2 * ((uint64_t)x23 * x31)) + ((0x2 * ((uint64_t)x25 * x29)) + ((uint64_t)x24 * x27))))))))))));
+ { uint64_t x49 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + ((0x2 * ((uint64_t)x15 * x37)) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((0x2 * ((uint64_t)x23 * x29)) + ((uint64_t)x25 * x27))))))))))) + (0x13 * ((uint64_t)x24 * x46)));
+ { uint64_t x50 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x23 * x27)))))))))) + (0x13 * (((uint64_t)x25 * x46) + ((uint64_t)x24 * x47))));
+ { uint64_t x51 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + (((uint64_t)x13 * x35) + ((0x2 * ((uint64_t)x15 * x33)) + ((0x2 * ((uint64_t)x17 * x31)) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x21 * x27))))))))) + (0x13 * ((0x2 * ((uint64_t)x23 * x46)) + ((0x2 * ((uint64_t)x25 * x47)) + (0x2 * ((uint64_t)x24 * x45))))));
+ { uint64_t x52 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((uint64_t)x19 * x27)))))))) + (0x13 * (((uint64_t)x21 * x46) + ((0x2 * ((uint64_t)x23 * x47)) + ((0x2 * ((uint64_t)x25 * x45)) + ((uint64_t)x24 * x43))))));
+ { uint64_t x53 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + ((0x2 * ((uint64_t)x15 * x29)) + ((uint64_t)x17 * x27))))))) + (0x13 * (((uint64_t)x19 * x46) + (((uint64_t)x21 * x47) + ((0x2 * ((uint64_t)x23 * x45)) + (((uint64_t)x25 * x43) + ((uint64_t)x24 * x41)))))));
+ { uint64_t x54 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + ((uint64_t)x15 * x27)))))) + (0x13 * (((uint64_t)x17 * x46) + (((uint64_t)x19 * x47) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + ((uint64_t)x24 * x39))))))));
+ { uint64_t x55 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((uint64_t)x13 * x27))))) + (0x13 * ((0x2 * ((uint64_t)x15 * x46)) + ((0x2 * ((uint64_t)x17 * x47)) + ((0x2 * ((uint64_t)x19 * x45)) + (((uint64_t)x21 * x43) + ((0x2 * ((uint64_t)x23 * x41)) + ((0x2 * ((uint64_t)x25 * x39)) + (0x2 * ((uint64_t)x24 * x37))))))))));
+ { uint64_t x56 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((uint64_t)x11 * x27)))) + (0x13 * (((uint64_t)x13 * x46) + ((0x2 * ((uint64_t)x15 * x47)) + ((0x2 * ((uint64_t)x17 * x45)) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + ((0x2 * ((uint64_t)x23 * x39)) + ((0x2 * ((uint64_t)x25 * x37)) + ((uint64_t)x24 * x35))))))))));
+ { uint64_t x57 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((uint64_t)x9 * x27))) + (0x13 * (((uint64_t)x11 * x46) + (((uint64_t)x13 * x47) + ((0x2 * ((uint64_t)x15 * x45)) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + ((0x2 * ((uint64_t)x23 * x37)) + (((uint64_t)x25 * x35) + ((uint64_t)x24 * x33)))))))))));
+ { uint64_t x58 = ((((uint64_t)x5 * x29) + ((uint64_t)x7 * x27)) + (0x13 * (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31))))))))))));
+ { uint64_t x59 = (((uint64_t)x5 * x27) + (0x13 * ((0x2 * ((uint64_t)x7 * x46)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + (((uint64_t)x13 * x43) + ((0x2 * ((uint64_t)x15 * x41)) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + (((uint64_t)x21 * x35) + ((0x2 * ((uint64_t)x23 * x33)) + ((0x2 * ((uint64_t)x25 * x31)) + (0x2 * ((uint64_t)x24 * x29))))))))))))));
+ { uint64_t x60 = (x59 >> 0x19);
+ { uint32_t x61 = ((uint32_t)x59 & 0x1ffffff);
+ { uint64_t x62 = (x60 + x58);
+ { uint64_t x63 = (x62 >> 0x18);
+ { uint32_t x64 = ((uint32_t)x62 & 0xffffff);
+ { uint64_t x65 = (x63 + x57);
+ { uint64_t x66 = (x65 >> 0x18);
+ { uint32_t x67 = ((uint32_t)x65 & 0xffffff);
+ { uint64_t x68 = (x66 + x56);
+ { uint64_t x69 = (x68 >> 0x18);
+ { uint32_t x70 = ((uint32_t)x68 & 0xffffff);
+ { uint64_t x71 = (x69 + x55);
+ { uint64_t x72 = (x71 >> 0x19);
+ { uint32_t x73 = ((uint32_t)x71 & 0x1ffffff);
+ { uint64_t x74 = (x72 + x54);
+ { uint64_t x75 = (x74 >> 0x18);
+ { uint32_t x76 = ((uint32_t)x74 & 0xffffff);
+ { uint64_t x77 = (x75 + x53);
+ { uint64_t x78 = (x77 >> 0x18);
+ { uint32_t x79 = ((uint32_t)x77 & 0xffffff);
+ { uint64_t x80 = (x78 + x52);
+ { uint64_t x81 = (x80 >> 0x18);
+ { uint32_t x82 = ((uint32_t)x80 & 0xffffff);
+ { uint64_t x83 = (x81 + x51);
+ { uint64_t x84 = (x83 >> 0x19);
+ { uint32_t x85 = ((uint32_t)x83 & 0x1ffffff);
+ { uint64_t x86 = (x84 + x50);
+ { uint64_t x87 = (x86 >> 0x18);
+ { uint32_t x88 = ((uint32_t)x86 & 0xffffff);
+ { uint64_t x89 = (x87 + x49);
+ { uint64_t x90 = (x89 >> 0x18);
+ { uint32_t x91 = ((uint32_t)x89 & 0xffffff);
+ { uint64_t x92 = (x90 + x48);
+ { uint64_t x93 = (x92 >> 0x18);
+ { uint32_t x94 = ((uint32_t)x92 & 0xffffff);
+ { uint64_t x95 = (x61 + (0x13 * x93));
+ { uint32_t x96 = (uint32_t) (x95 >> 0x19);
+ { uint32_t x97 = ((uint32_t)x95 & 0x1ffffff);
+ { uint32_t x98 = (x96 + x64);
+ { uint32_t x99 = (x98 >> 0x18);
+ { uint32_t x100 = (x98 & 0xffffff);
+ out[0] = x97;
+ out[1] = x100;
+ out[2] = (x99 + x67);
+ out[3] = x70;
+ out[4] = x73;
+ out[5] = x76;
+ out[6] = x79;
+ out[7] = x82;
+ out[8] = x85;
+ out[9] = x88;
+ out[10] = x91;
+ out[11] = x94;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e291m19/fesquare.c b/src/Specific/solinas32_2e291m19/fesquare.c
index 49f5a676c..5cfc15536 100644
--- a/src/Specific/solinas32_2e291m19/fesquare.c
+++ b/src/Specific/solinas32_2e291m19/fesquare.c
@@ -1,86 +1,80 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x23 = (((uint64_t)x2 * x21) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x21 * x2))))))))))));
-{ uint64_t x24 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + ((0x2 * ((uint64_t)x12 * x12)) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x13 * ((uint64_t)x21 * x21)));
-{ uint64_t x25 = ((((uint64_t)x2 * x20) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x20 * x2)))))))))) + (0x13 * (((uint64_t)x22 * x21) + ((uint64_t)x21 * x22))));
-{ uint64_t x26 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + (((uint64_t)x10 * x10) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x13 * ((0x2 * ((uint64_t)x20 * x21)) + ((0x2 * ((uint64_t)x22 * x22)) + (0x2 * ((uint64_t)x21 * x20))))));
-{ uint64_t x27 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x13 * (((uint64_t)x18 * x21) + ((0x2 * ((uint64_t)x20 * x22)) + ((0x2 * ((uint64_t)x22 * x20)) + ((uint64_t)x21 * x18))))));
-{ uint64_t x28 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x13 * (((uint64_t)x16 * x21) + (((uint64_t)x18 * x22) + ((0x2 * ((uint64_t)x20 * x20)) + (((uint64_t)x22 * x18) + ((uint64_t)x21 * x16)))))));
-{ uint64_t x29 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x13 * (((uint64_t)x14 * x21) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + ((uint64_t)x21 * x14))))))));
-{ uint64_t x30 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x13 * ((0x2 * ((uint64_t)x12 * x21)) + ((0x2 * ((uint64_t)x14 * x22)) + ((0x2 * ((uint64_t)x16 * x20)) + (((uint64_t)x18 * x18) + ((0x2 * ((uint64_t)x20 * x16)) + ((0x2 * ((uint64_t)x22 * x14)) + (0x2 * ((uint64_t)x21 * x12))))))))));
-{ uint64_t x31 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x13 * (((uint64_t)x10 * x21) + ((0x2 * ((uint64_t)x12 * x22)) + ((0x2 * ((uint64_t)x14 * x20)) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((0x2 * ((uint64_t)x20 * x14)) + ((0x2 * ((uint64_t)x22 * x12)) + ((uint64_t)x21 * x10))))))))));
-{ uint64_t x32 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x13 * (((uint64_t)x8 * x21) + (((uint64_t)x10 * x22) + ((0x2 * ((uint64_t)x12 * x20)) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + ((0x2 * ((uint64_t)x20 * x12)) + (((uint64_t)x22 * x10) + ((uint64_t)x21 * x8)))))))))));
-{ uint64_t x33 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x13 * (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6))))))))))));
-{ uint64_t x34 = (((uint64_t)x2 * x2) + (0x13 * ((0x2 * ((uint64_t)x4 * x21)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + (((uint64_t)x10 * x18) + ((0x2 * ((uint64_t)x12 * x16)) + ((0x2 * ((uint64_t)x14 * x14)) + ((0x2 * ((uint64_t)x16 * x12)) + (((uint64_t)x18 * x10) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + (0x2 * ((uint64_t)x21 * x4))))))))))))));
-{ uint64_t x35 = (x34 >> 0x19);
-{ uint32_t x36 = ((uint32_t)x34 & 0x1ffffff);
-{ uint64_t x37 = (x35 + x33);
-{ uint64_t x38 = (x37 >> 0x18);
-{ uint32_t x39 = ((uint32_t)x37 & 0xffffff);
-{ uint64_t x40 = (x38 + x32);
-{ uint64_t x41 = (x40 >> 0x18);
-{ uint32_t x42 = ((uint32_t)x40 & 0xffffff);
-{ uint64_t x43 = (x41 + x31);
-{ uint64_t x44 = (x43 >> 0x18);
-{ uint32_t x45 = ((uint32_t)x43 & 0xffffff);
-{ uint64_t x46 = (x44 + x30);
-{ uint64_t x47 = (x46 >> 0x19);
-{ uint32_t x48 = ((uint32_t)x46 & 0x1ffffff);
-{ uint64_t x49 = (x47 + x29);
-{ uint64_t x50 = (x49 >> 0x18);
-{ uint32_t x51 = ((uint32_t)x49 & 0xffffff);
-{ uint64_t x52 = (x50 + x28);
-{ uint64_t x53 = (x52 >> 0x18);
-{ uint32_t x54 = ((uint32_t)x52 & 0xffffff);
-{ uint64_t x55 = (x53 + x27);
-{ uint64_t x56 = (x55 >> 0x18);
-{ uint32_t x57 = ((uint32_t)x55 & 0xffffff);
-{ uint64_t x58 = (x56 + x26);
-{ uint64_t x59 = (x58 >> 0x19);
-{ uint32_t x60 = ((uint32_t)x58 & 0x1ffffff);
-{ uint64_t x61 = (x59 + x25);
-{ uint64_t x62 = (x61 >> 0x18);
-{ uint32_t x63 = ((uint32_t)x61 & 0xffffff);
-{ uint64_t x64 = (x62 + x24);
-{ uint64_t x65 = (x64 >> 0x18);
-{ uint32_t x66 = ((uint32_t)x64 & 0xffffff);
-{ uint64_t x67 = (x65 + x23);
-{ uint64_t x68 = (x67 >> 0x18);
-{ uint32_t x69 = ((uint32_t)x67 & 0xffffff);
-{ uint64_t x70 = (x36 + (0x13 * x68));
-{ uint32_t x71 = (uint32_t) (x70 >> 0x19);
-{ uint32_t x72 = ((uint32_t)x70 & 0x1ffffff);
-{ uint32_t x73 = (x71 + x39);
-{ uint32_t x74 = (x73 >> 0x18);
-{ uint32_t x75 = (x73 & 0xffffff);
-out[0] = x69;
-out[1] = x66;
-out[2] = x63;
-out[3] = x60;
-out[4] = x57;
-out[5] = x54;
-out[6] = x51;
-out[7] = x48;
-out[8] = x45;
-out[9] = x74 + x42;
-out[10] = x75;
-out[11] = x72;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void fesquare(uint32_t out[12], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x23 = (((uint64_t)x2 * x21) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x21 * x2))))))))))));
+ { uint64_t x24 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + ((0x2 * ((uint64_t)x12 * x12)) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x13 * ((uint64_t)x21 * x21)));
+ { uint64_t x25 = ((((uint64_t)x2 * x20) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x20 * x2)))))))))) + (0x13 * (((uint64_t)x22 * x21) + ((uint64_t)x21 * x22))));
+ { uint64_t x26 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + (((uint64_t)x10 * x10) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x13 * ((0x2 * ((uint64_t)x20 * x21)) + ((0x2 * ((uint64_t)x22 * x22)) + (0x2 * ((uint64_t)x21 * x20))))));
+ { uint64_t x27 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x13 * (((uint64_t)x18 * x21) + ((0x2 * ((uint64_t)x20 * x22)) + ((0x2 * ((uint64_t)x22 * x20)) + ((uint64_t)x21 * x18))))));
+ { uint64_t x28 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x13 * (((uint64_t)x16 * x21) + (((uint64_t)x18 * x22) + ((0x2 * ((uint64_t)x20 * x20)) + (((uint64_t)x22 * x18) + ((uint64_t)x21 * x16)))))));
+ { uint64_t x29 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x13 * (((uint64_t)x14 * x21) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + ((uint64_t)x21 * x14))))))));
+ { uint64_t x30 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x13 * ((0x2 * ((uint64_t)x12 * x21)) + ((0x2 * ((uint64_t)x14 * x22)) + ((0x2 * ((uint64_t)x16 * x20)) + (((uint64_t)x18 * x18) + ((0x2 * ((uint64_t)x20 * x16)) + ((0x2 * ((uint64_t)x22 * x14)) + (0x2 * ((uint64_t)x21 * x12))))))))));
+ { uint64_t x31 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x13 * (((uint64_t)x10 * x21) + ((0x2 * ((uint64_t)x12 * x22)) + ((0x2 * ((uint64_t)x14 * x20)) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((0x2 * ((uint64_t)x20 * x14)) + ((0x2 * ((uint64_t)x22 * x12)) + ((uint64_t)x21 * x10))))))))));
+ { uint64_t x32 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x13 * (((uint64_t)x8 * x21) + (((uint64_t)x10 * x22) + ((0x2 * ((uint64_t)x12 * x20)) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + ((0x2 * ((uint64_t)x20 * x12)) + (((uint64_t)x22 * x10) + ((uint64_t)x21 * x8)))))))))));
+ { uint64_t x33 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x13 * (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6))))))))))));
+ { uint64_t x34 = (((uint64_t)x2 * x2) + (0x13 * ((0x2 * ((uint64_t)x4 * x21)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + (((uint64_t)x10 * x18) + ((0x2 * ((uint64_t)x12 * x16)) + ((0x2 * ((uint64_t)x14 * x14)) + ((0x2 * ((uint64_t)x16 * x12)) + (((uint64_t)x18 * x10) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + (0x2 * ((uint64_t)x21 * x4))))))))))))));
+ { uint64_t x35 = (x34 >> 0x19);
+ { uint32_t x36 = ((uint32_t)x34 & 0x1ffffff);
+ { uint64_t x37 = (x35 + x33);
+ { uint64_t x38 = (x37 >> 0x18);
+ { uint32_t x39 = ((uint32_t)x37 & 0xffffff);
+ { uint64_t x40 = (x38 + x32);
+ { uint64_t x41 = (x40 >> 0x18);
+ { uint32_t x42 = ((uint32_t)x40 & 0xffffff);
+ { uint64_t x43 = (x41 + x31);
+ { uint64_t x44 = (x43 >> 0x18);
+ { uint32_t x45 = ((uint32_t)x43 & 0xffffff);
+ { uint64_t x46 = (x44 + x30);
+ { uint64_t x47 = (x46 >> 0x19);
+ { uint32_t x48 = ((uint32_t)x46 & 0x1ffffff);
+ { uint64_t x49 = (x47 + x29);
+ { uint64_t x50 = (x49 >> 0x18);
+ { uint32_t x51 = ((uint32_t)x49 & 0xffffff);
+ { uint64_t x52 = (x50 + x28);
+ { uint64_t x53 = (x52 >> 0x18);
+ { uint32_t x54 = ((uint32_t)x52 & 0xffffff);
+ { uint64_t x55 = (x53 + x27);
+ { uint64_t x56 = (x55 >> 0x18);
+ { uint32_t x57 = ((uint32_t)x55 & 0xffffff);
+ { uint64_t x58 = (x56 + x26);
+ { uint64_t x59 = (x58 >> 0x19);
+ { uint32_t x60 = ((uint32_t)x58 & 0x1ffffff);
+ { uint64_t x61 = (x59 + x25);
+ { uint64_t x62 = (x61 >> 0x18);
+ { uint32_t x63 = ((uint32_t)x61 & 0xffffff);
+ { uint64_t x64 = (x62 + x24);
+ { uint64_t x65 = (x64 >> 0x18);
+ { uint32_t x66 = ((uint32_t)x64 & 0xffffff);
+ { uint64_t x67 = (x65 + x23);
+ { uint64_t x68 = (x67 >> 0x18);
+ { uint32_t x69 = ((uint32_t)x67 & 0xffffff);
+ { uint64_t x70 = (x36 + (0x13 * x68));
+ { uint32_t x71 = (uint32_t) (x70 >> 0x19);
+ { uint32_t x72 = ((uint32_t)x70 & 0x1ffffff);
+ { uint32_t x73 = (x71 + x39);
+ { uint32_t x74 = (x73 >> 0x18);
+ { uint32_t x75 = (x73 & 0xffffff);
+ out[0] = x72;
+ out[1] = x75;
+ out[2] = (x74 + x42);
+ out[3] = x45;
+ out[4] = x48;
+ out[5] = x51;
+ out[6] = x54;
+ out[7] = x57;
+ out[8] = x60;
+ out[9] = x63;
+ out[10] = x66;
+ out[11] = x69;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e291m19/freeze.c b/src/Specific/solinas32_2e291m19/freeze.c
index bb62d2336..a2c3a1c70 100644
--- a/src/Specific/solinas32_2e291m19/freeze.c
+++ b/src/Specific/solinas32_2e291m19/freeze.c
@@ -1,25 +1,64 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x24;
-out[1] = uint8_t x25 = Op Syntax.SubWithGetBorrow 25 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x1ffffed;;
+static void freeze(uint32_t out[12], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x1ffffed);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x4, 0xffffff);
+ { uint32_t x30, uint8_t x31 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x28, Return x6, 0xffffff);
+ { uint32_t x33, uint8_t x34 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x31, Return x8, 0xffffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x34, Return x10, 0x1ffffff);
+ { uint32_t x39, uint8_t x40 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x12, 0xffffff);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x40, Return x14, 0xffffff);
+ { uint32_t x45, uint8_t x46 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x43, Return x16, 0xffffff);
+ { uint32_t x48, uint8_t x49 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x46, Return x18, 0x1ffffff);
+ { uint32_t x51, uint8_t x52 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x49, Return x20, 0xffffff);
+ { uint32_t x54, uint8_t x55 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x52, Return x22, 0xffffff);
+ { uint32_t x57, uint8_t x58 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x55, Return x21, 0xffffff);
+ { uint32_t x59 = (uint32_t)cmovznz(x58, 0x0, 0xffffffff);
+ { uint32_t x60 = (x59 & 0x1ffffed);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x24, Return x60);
+ { uint32_t x64 = (x59 & 0xffffff);
+ { uint32_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x27, Return x64);
+ { uint32_t x68 = (x59 & 0xffffff);
+ { uint32_t x70, uint8_t x71 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x67, Return x30, Return x68);
+ { uint32_t x72 = (x59 & 0xffffff);
+ { uint32_t x74, uint8_t x75 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x71, Return x33, Return x72);
+ { uint32_t x76 = (x59 & 0x1ffffff);
+ { uint32_t x78, uint8_t x79 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x36, Return x76);
+ { uint32_t x80 = (x59 & 0xffffff);
+ { uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x79, Return x39, Return x80);
+ { uint32_t x84 = (x59 & 0xffffff);
+ { uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x42, Return x84);
+ { uint32_t x88 = (x59 & 0xffffff);
+ { uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x45, Return x88);
+ { uint32_t x92 = (x59 & 0x1ffffff);
+ { uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x48, Return x92);
+ { uint32_t x96 = (x59 & 0xffffff);
+ { uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x51, Return x96);
+ { uint32_t x100 = (x59 & 0xffffff);
+ { uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x54, Return x100);
+ { uint32_t x104 = (x59 & 0xffffff);
+ { uint32_t x106, uint8_t _ = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x57, Return x104);
+ out[0] = x62;
+ out[1] = x66;
+ out[2] = x70;
+ out[3] = x74;
+ out[4] = x78;
+ out[5] = x82;
+ out[6] = x86;
+ out[7] = x90;
+ out[8] = x94;
+ out[9] = x98;
+ out[10] = x102;
+ out[11] = x106;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e321m9/femul.c b/src/Specific/solinas32_2e321m9/femul.c
index 17f2e6012..a73325071 100644
--- a/src/Specific/solinas32_2e321m9/femul.c
+++ b/src/Specific/solinas32_2e321m9/femul.c
@@ -1,106 +1,120 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
-{ uint64_t x64 = (((uint64_t)x5 * x62) + ((0x2 * ((uint64_t)x7 * x63)) + ((0x2 * ((uint64_t)x9 * x61)) + ((0x2 * ((uint64_t)x11 * x59)) + ((0x2 * ((uint64_t)x13 * x57)) + ((0x2 * ((uint64_t)x15 * x55)) + ((0x2 * ((uint64_t)x17 * x53)) + ((0x2 * ((uint64_t)x19 * x51)) + ((0x2 * ((uint64_t)x21 * x49)) + ((0x2 * ((uint64_t)x23 * x47)) + ((0x2 * ((uint64_t)x25 * x45)) + ((0x2 * ((uint64_t)x27 * x43)) + ((0x2 * ((uint64_t)x29 * x41)) + ((0x2 * ((uint64_t)x31 * x39)) + ((0x2 * ((uint64_t)x33 * x37)) + ((uint64_t)x32 * x35))))))))))))))));
-{ uint64_t x65 = ((((uint64_t)x5 * x63) + ((0x2 * ((uint64_t)x7 * x61)) + ((0x2 * ((uint64_t)x9 * x59)) + ((0x2 * ((uint64_t)x11 * x57)) + ((0x2 * ((uint64_t)x13 * x55)) + ((0x2 * ((uint64_t)x15 * x53)) + ((0x2 * ((uint64_t)x17 * x51)) + ((0x2 * ((uint64_t)x19 * x49)) + ((0x2 * ((uint64_t)x21 * x47)) + ((0x2 * ((uint64_t)x23 * x45)) + ((0x2 * ((uint64_t)x25 * x43)) + ((0x2 * ((uint64_t)x27 * x41)) + ((0x2 * ((uint64_t)x29 * x39)) + ((0x2 * ((uint64_t)x31 * x37)) + ((uint64_t)x33 * x35))))))))))))))) + (0x9 * ((uint64_t)x32 * x62)));
-{ uint64_t x66 = ((((uint64_t)x5 * x61) + ((0x2 * ((uint64_t)x7 * x59)) + ((0x2 * ((uint64_t)x9 * x57)) + ((0x2 * ((uint64_t)x11 * x55)) + ((0x2 * ((uint64_t)x13 * x53)) + ((0x2 * ((uint64_t)x15 * x51)) + ((0x2 * ((uint64_t)x17 * x49)) + ((0x2 * ((uint64_t)x19 * x47)) + ((0x2 * ((uint64_t)x21 * x45)) + ((0x2 * ((uint64_t)x23 * x43)) + ((0x2 * ((uint64_t)x25 * x41)) + ((0x2 * ((uint64_t)x27 * x39)) + ((0x2 * ((uint64_t)x29 * x37)) + ((uint64_t)x31 * x35)))))))))))))) + (0x9 * (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))));
-{ uint64_t x67 = ((((uint64_t)x5 * x59) + ((0x2 * ((uint64_t)x7 * x57)) + ((0x2 * ((uint64_t)x9 * x55)) + ((0x2 * ((uint64_t)x11 * x53)) + ((0x2 * ((uint64_t)x13 * x51)) + ((0x2 * ((uint64_t)x15 * x49)) + ((0x2 * ((uint64_t)x17 * x47)) + ((0x2 * ((uint64_t)x19 * x45)) + ((0x2 * ((uint64_t)x21 * x43)) + ((0x2 * ((uint64_t)x23 * x41)) + ((0x2 * ((uint64_t)x25 * x39)) + ((0x2 * ((uint64_t)x27 * x37)) + ((uint64_t)x29 * x35))))))))))))) + (0x9 * (((uint64_t)x31 * x62) + (((uint64_t)x33 * x63) + ((uint64_t)x32 * x61)))));
-{ uint64_t x68 = ((((uint64_t)x5 * x57) + ((0x2 * ((uint64_t)x7 * x55)) + ((0x2 * ((uint64_t)x9 * x53)) + ((0x2 * ((uint64_t)x11 * x51)) + ((0x2 * ((uint64_t)x13 * x49)) + ((0x2 * ((uint64_t)x15 * x47)) + ((0x2 * ((uint64_t)x17 * x45)) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + ((0x2 * ((uint64_t)x23 * x39)) + ((0x2 * ((uint64_t)x25 * x37)) + ((uint64_t)x27 * x35)))))))))))) + (0x9 * (((uint64_t)x29 * x62) + (((uint64_t)x31 * x63) + (((uint64_t)x33 * x61) + ((uint64_t)x32 * x59))))));
-{ uint64_t x69 = ((((uint64_t)x5 * x55) + ((0x2 * ((uint64_t)x7 * x53)) + ((0x2 * ((uint64_t)x9 * x51)) + ((0x2 * ((uint64_t)x11 * x49)) + ((0x2 * ((uint64_t)x13 * x47)) + ((0x2 * ((uint64_t)x15 * x45)) + ((0x2 * ((uint64_t)x17 * x43)) + ((0x2 * ((uint64_t)x19 * x41)) + ((0x2 * ((uint64_t)x21 * x39)) + ((0x2 * ((uint64_t)x23 * x37)) + ((uint64_t)x25 * x35))))))))))) + (0x9 * (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + (((uint64_t)x31 * x61) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))));
-{ uint64_t x70 = ((((uint64_t)x5 * x53) + ((0x2 * ((uint64_t)x7 * x51)) + ((0x2 * ((uint64_t)x9 * x49)) + ((0x2 * ((uint64_t)x11 * x47)) + ((0x2 * ((uint64_t)x13 * x45)) + ((0x2 * ((uint64_t)x15 * x43)) + ((0x2 * ((uint64_t)x17 * x41)) + ((0x2 * ((uint64_t)x19 * x39)) + ((0x2 * ((uint64_t)x21 * x37)) + ((uint64_t)x23 * x35)))))))))) + (0x9 * (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))));
-{ uint64_t x71 = ((((uint64_t)x5 * x51) + ((0x2 * ((uint64_t)x7 * x49)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + ((uint64_t)x21 * x35))))))))) + (0x9 * (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))));
-{ uint64_t x72 = ((((uint64_t)x5 * x49) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + ((0x2 * ((uint64_t)x11 * x43)) + ((0x2 * ((uint64_t)x13 * x41)) + ((0x2 * ((uint64_t)x15 * x39)) + ((0x2 * ((uint64_t)x17 * x37)) + ((uint64_t)x19 * x35)))))))) + (0x9 * (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51))))))))));
-{ uint64_t x73 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + ((0x2 * ((uint64_t)x9 * x43)) + ((0x2 * ((uint64_t)x11 * x41)) + ((0x2 * ((uint64_t)x13 * x39)) + ((0x2 * ((uint64_t)x15 * x37)) + ((uint64_t)x17 * x35))))))) + (0x9 * (((uint64_t)x19 * x62) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + (((uint64_t)x33 * x51) + ((uint64_t)x32 * x49)))))))))));
-{ uint64_t x74 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + ((0x2 * ((uint64_t)x11 * x39)) + ((0x2 * ((uint64_t)x13 * x37)) + ((uint64_t)x15 * x35)))))) + (0x9 * (((uint64_t)x17 * x62) + (((uint64_t)x19 * x63) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + (((uint64_t)x31 * x51) + (((uint64_t)x33 * x49) + ((uint64_t)x32 * x47))))))))))));
-{ uint64_t x75 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((uint64_t)x13 * x35))))) + (0x9 * (((uint64_t)x15 * x62) + (((uint64_t)x17 * x63) + (((uint64_t)x19 * x61) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + (((uint64_t)x29 * x51) + (((uint64_t)x31 * x49) + (((uint64_t)x33 * x47) + ((uint64_t)x32 * x45)))))))))))));
-{ uint64_t x76 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((uint64_t)x11 * x35)))) + (0x9 * (((uint64_t)x13 * x62) + (((uint64_t)x15 * x63) + (((uint64_t)x17 * x61) + (((uint64_t)x19 * x59) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + (((uint64_t)x31 * x47) + (((uint64_t)x33 * x45) + ((uint64_t)x32 * x43))))))))))))));
-{ uint64_t x77 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((uint64_t)x9 * x35))) + (0x9 * (((uint64_t)x11 * x62) + (((uint64_t)x13 * x63) + (((uint64_t)x15 * x61) + (((uint64_t)x17 * x59) + (((uint64_t)x19 * x57) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + (((uint64_t)x31 * x45) + (((uint64_t)x33 * x43) + ((uint64_t)x32 * x41)))))))))))))));
-{ uint64_t x78 = ((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) + (0x9 * (((uint64_t)x9 * x62) + (((uint64_t)x11 * x63) + (((uint64_t)x13 * x61) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + (((uint64_t)x33 * x41) + ((uint64_t)x32 * x39))))))))))))))));
-{ uint64_t x79 = (((uint64_t)x5 * x35) + (0x9 * ((0x2 * ((uint64_t)x7 * x62)) + ((0x2 * ((uint64_t)x9 * x63)) + ((0x2 * ((uint64_t)x11 * x61)) + ((0x2 * ((uint64_t)x13 * x59)) + ((0x2 * ((uint64_t)x15 * x57)) + ((0x2 * ((uint64_t)x17 * x55)) + ((0x2 * ((uint64_t)x19 * x53)) + ((0x2 * ((uint64_t)x21 * x51)) + ((0x2 * ((uint64_t)x23 * x49)) + ((0x2 * ((uint64_t)x25 * x47)) + ((0x2 * ((uint64_t)x27 * x45)) + ((0x2 * ((uint64_t)x29 * x43)) + ((0x2 * ((uint64_t)x31 * x41)) + ((0x2 * ((uint64_t)x33 * x39)) + (0x2 * ((uint64_t)x32 * x37))))))))))))))))));
-{ uint32_t x80 = (uint32_t) (x79 >> 0x15);
-{ uint32_t x81 = ((uint32_t)x79 & 0x1fffff);
-{ uint64_t x82 = (x80 + x78);
-{ uint32_t x83 = (uint32_t) (x82 >> 0x14);
-{ uint32_t x84 = ((uint32_t)x82 & 0xfffff);
-{ uint64_t x85 = (x83 + x77);
-{ uint32_t x86 = (uint32_t) (x85 >> 0x14);
-{ uint32_t x87 = ((uint32_t)x85 & 0xfffff);
-{ uint64_t x88 = (x86 + x76);
-{ uint32_t x89 = (uint32_t) (x88 >> 0x14);
-{ uint32_t x90 = ((uint32_t)x88 & 0xfffff);
-{ uint64_t x91 = (x89 + x75);
-{ uint32_t x92 = (uint32_t) (x91 >> 0x14);
-{ uint32_t x93 = ((uint32_t)x91 & 0xfffff);
-{ uint64_t x94 = (x92 + x74);
-{ uint32_t x95 = (uint32_t) (x94 >> 0x14);
-{ uint32_t x96 = ((uint32_t)x94 & 0xfffff);
-{ uint64_t x97 = (x95 + x73);
-{ uint32_t x98 = (uint32_t) (x97 >> 0x14);
-{ uint32_t x99 = ((uint32_t)x97 & 0xfffff);
-{ uint64_t x100 = (x98 + x72);
-{ uint32_t x101 = (uint32_t) (x100 >> 0x14);
-{ uint32_t x102 = ((uint32_t)x100 & 0xfffff);
-{ uint64_t x103 = (x101 + x71);
-{ uint32_t x104 = (uint32_t) (x103 >> 0x14);
-{ uint32_t x105 = ((uint32_t)x103 & 0xfffff);
-{ uint64_t x106 = (x104 + x70);
-{ uint32_t x107 = (uint32_t) (x106 >> 0x14);
-{ uint32_t x108 = ((uint32_t)x106 & 0xfffff);
-{ uint64_t x109 = (x107 + x69);
-{ uint32_t x110 = (uint32_t) (x109 >> 0x14);
-{ uint32_t x111 = ((uint32_t)x109 & 0xfffff);
-{ uint64_t x112 = (x110 + x68);
-{ uint32_t x113 = (uint32_t) (x112 >> 0x14);
-{ uint32_t x114 = ((uint32_t)x112 & 0xfffff);
-{ uint64_t x115 = (x113 + x67);
-{ uint32_t x116 = (uint32_t) (x115 >> 0x14);
-{ uint32_t x117 = ((uint32_t)x115 & 0xfffff);
-{ uint64_t x118 = (x116 + x66);
-{ uint32_t x119 = (uint32_t) (x118 >> 0x14);
-{ uint32_t x120 = ((uint32_t)x118 & 0xfffff);
-{ uint64_t x121 = (x119 + x65);
-{ uint32_t x122 = (uint32_t) (x121 >> 0x14);
-{ uint32_t x123 = ((uint32_t)x121 & 0xfffff);
-{ uint64_t x124 = (x122 + x64);
-{ uint32_t x125 = (uint32_t) (x124 >> 0x14);
-{ uint32_t x126 = ((uint32_t)x124 & 0xfffff);
-{ uint32_t x127 = (x81 + (0x9 * x125));
-{ uint32_t x128 = (x127 >> 0x15);
-{ uint32_t x129 = (x127 & 0x1fffff);
-{ uint32_t x130 = (x128 + x84);
-{ uint32_t x131 = (x130 >> 0x14);
-{ uint32_t x132 = (x130 & 0xfffff);
-out[0] = x126;
-out[1] = x123;
-out[2] = x120;
-out[3] = x117;
-out[4] = x114;
-out[5] = x111;
-out[6] = x108;
-out[7] = x105;
-out[8] = x102;
-out[9] = x99;
-out[10] = x96;
-out[11] = x93;
-out[12] = x90;
-out[13] = x131 + x87;
-out[14] = x132;
-out[15] = x129;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[16];
+static void femul(uint32_t out[16], const uint32_t in1[16], const uint32_t in2[16]) {
+ { const uint32_t x32 = in1[15];
+ { const uint32_t x33 = in1[14];
+ { const uint32_t x31 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x62 = in2[15];
+ { const uint32_t x63 = in2[14];
+ { const uint32_t x61 = in2[13];
+ { const uint32_t x59 = in2[12];
+ { const uint32_t x57 = in2[11];
+ { const uint32_t x55 = in2[10];
+ { const uint32_t x53 = in2[9];
+ { const uint32_t x51 = in2[8];
+ { const uint32_t x49 = in2[7];
+ { const uint32_t x47 = in2[6];
+ { const uint32_t x45 = in2[5];
+ { const uint32_t x43 = in2[4];
+ { const uint32_t x41 = in2[3];
+ { const uint32_t x39 = in2[2];
+ { const uint32_t x37 = in2[1];
+ { const uint32_t x35 = in2[0];
+ { uint64_t x64 = (((uint64_t)x5 * x62) + ((0x2 * ((uint64_t)x7 * x63)) + ((0x2 * ((uint64_t)x9 * x61)) + ((0x2 * ((uint64_t)x11 * x59)) + ((0x2 * ((uint64_t)x13 * x57)) + ((0x2 * ((uint64_t)x15 * x55)) + ((0x2 * ((uint64_t)x17 * x53)) + ((0x2 * ((uint64_t)x19 * x51)) + ((0x2 * ((uint64_t)x21 * x49)) + ((0x2 * ((uint64_t)x23 * x47)) + ((0x2 * ((uint64_t)x25 * x45)) + ((0x2 * ((uint64_t)x27 * x43)) + ((0x2 * ((uint64_t)x29 * x41)) + ((0x2 * ((uint64_t)x31 * x39)) + ((0x2 * ((uint64_t)x33 * x37)) + ((uint64_t)x32 * x35))))))))))))))));
+ { uint64_t x65 = ((((uint64_t)x5 * x63) + ((0x2 * ((uint64_t)x7 * x61)) + ((0x2 * ((uint64_t)x9 * x59)) + ((0x2 * ((uint64_t)x11 * x57)) + ((0x2 * ((uint64_t)x13 * x55)) + ((0x2 * ((uint64_t)x15 * x53)) + ((0x2 * ((uint64_t)x17 * x51)) + ((0x2 * ((uint64_t)x19 * x49)) + ((0x2 * ((uint64_t)x21 * x47)) + ((0x2 * ((uint64_t)x23 * x45)) + ((0x2 * ((uint64_t)x25 * x43)) + ((0x2 * ((uint64_t)x27 * x41)) + ((0x2 * ((uint64_t)x29 * x39)) + ((0x2 * ((uint64_t)x31 * x37)) + ((uint64_t)x33 * x35))))))))))))))) + (0x9 * ((uint64_t)x32 * x62)));
+ { uint64_t x66 = ((((uint64_t)x5 * x61) + ((0x2 * ((uint64_t)x7 * x59)) + ((0x2 * ((uint64_t)x9 * x57)) + ((0x2 * ((uint64_t)x11 * x55)) + ((0x2 * ((uint64_t)x13 * x53)) + ((0x2 * ((uint64_t)x15 * x51)) + ((0x2 * ((uint64_t)x17 * x49)) + ((0x2 * ((uint64_t)x19 * x47)) + ((0x2 * ((uint64_t)x21 * x45)) + ((0x2 * ((uint64_t)x23 * x43)) + ((0x2 * ((uint64_t)x25 * x41)) + ((0x2 * ((uint64_t)x27 * x39)) + ((0x2 * ((uint64_t)x29 * x37)) + ((uint64_t)x31 * x35)))))))))))))) + (0x9 * (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))));
+ { uint64_t x67 = ((((uint64_t)x5 * x59) + ((0x2 * ((uint64_t)x7 * x57)) + ((0x2 * ((uint64_t)x9 * x55)) + ((0x2 * ((uint64_t)x11 * x53)) + ((0x2 * ((uint64_t)x13 * x51)) + ((0x2 * ((uint64_t)x15 * x49)) + ((0x2 * ((uint64_t)x17 * x47)) + ((0x2 * ((uint64_t)x19 * x45)) + ((0x2 * ((uint64_t)x21 * x43)) + ((0x2 * ((uint64_t)x23 * x41)) + ((0x2 * ((uint64_t)x25 * x39)) + ((0x2 * ((uint64_t)x27 * x37)) + ((uint64_t)x29 * x35))))))))))))) + (0x9 * (((uint64_t)x31 * x62) + (((uint64_t)x33 * x63) + ((uint64_t)x32 * x61)))));
+ { uint64_t x68 = ((((uint64_t)x5 * x57) + ((0x2 * ((uint64_t)x7 * x55)) + ((0x2 * ((uint64_t)x9 * x53)) + ((0x2 * ((uint64_t)x11 * x51)) + ((0x2 * ((uint64_t)x13 * x49)) + ((0x2 * ((uint64_t)x15 * x47)) + ((0x2 * ((uint64_t)x17 * x45)) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + ((0x2 * ((uint64_t)x23 * x39)) + ((0x2 * ((uint64_t)x25 * x37)) + ((uint64_t)x27 * x35)))))))))))) + (0x9 * (((uint64_t)x29 * x62) + (((uint64_t)x31 * x63) + (((uint64_t)x33 * x61) + ((uint64_t)x32 * x59))))));
+ { uint64_t x69 = ((((uint64_t)x5 * x55) + ((0x2 * ((uint64_t)x7 * x53)) + ((0x2 * ((uint64_t)x9 * x51)) + ((0x2 * ((uint64_t)x11 * x49)) + ((0x2 * ((uint64_t)x13 * x47)) + ((0x2 * ((uint64_t)x15 * x45)) + ((0x2 * ((uint64_t)x17 * x43)) + ((0x2 * ((uint64_t)x19 * x41)) + ((0x2 * ((uint64_t)x21 * x39)) + ((0x2 * ((uint64_t)x23 * x37)) + ((uint64_t)x25 * x35))))))))))) + (0x9 * (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + (((uint64_t)x31 * x61) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))));
+ { uint64_t x70 = ((((uint64_t)x5 * x53) + ((0x2 * ((uint64_t)x7 * x51)) + ((0x2 * ((uint64_t)x9 * x49)) + ((0x2 * ((uint64_t)x11 * x47)) + ((0x2 * ((uint64_t)x13 * x45)) + ((0x2 * ((uint64_t)x15 * x43)) + ((0x2 * ((uint64_t)x17 * x41)) + ((0x2 * ((uint64_t)x19 * x39)) + ((0x2 * ((uint64_t)x21 * x37)) + ((uint64_t)x23 * x35)))))))))) + (0x9 * (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))));
+ { uint64_t x71 = ((((uint64_t)x5 * x51) + ((0x2 * ((uint64_t)x7 * x49)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + ((uint64_t)x21 * x35))))))))) + (0x9 * (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))));
+ { uint64_t x72 = ((((uint64_t)x5 * x49) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + ((0x2 * ((uint64_t)x11 * x43)) + ((0x2 * ((uint64_t)x13 * x41)) + ((0x2 * ((uint64_t)x15 * x39)) + ((0x2 * ((uint64_t)x17 * x37)) + ((uint64_t)x19 * x35)))))))) + (0x9 * (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51))))))))));
+ { uint64_t x73 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + ((0x2 * ((uint64_t)x9 * x43)) + ((0x2 * ((uint64_t)x11 * x41)) + ((0x2 * ((uint64_t)x13 * x39)) + ((0x2 * ((uint64_t)x15 * x37)) + ((uint64_t)x17 * x35))))))) + (0x9 * (((uint64_t)x19 * x62) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + (((uint64_t)x33 * x51) + ((uint64_t)x32 * x49)))))))))));
+ { uint64_t x74 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + ((0x2 * ((uint64_t)x11 * x39)) + ((0x2 * ((uint64_t)x13 * x37)) + ((uint64_t)x15 * x35)))))) + (0x9 * (((uint64_t)x17 * x62) + (((uint64_t)x19 * x63) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + (((uint64_t)x31 * x51) + (((uint64_t)x33 * x49) + ((uint64_t)x32 * x47))))))))))));
+ { uint64_t x75 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((uint64_t)x13 * x35))))) + (0x9 * (((uint64_t)x15 * x62) + (((uint64_t)x17 * x63) + (((uint64_t)x19 * x61) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + (((uint64_t)x29 * x51) + (((uint64_t)x31 * x49) + (((uint64_t)x33 * x47) + ((uint64_t)x32 * x45)))))))))))));
+ { uint64_t x76 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((uint64_t)x11 * x35)))) + (0x9 * (((uint64_t)x13 * x62) + (((uint64_t)x15 * x63) + (((uint64_t)x17 * x61) + (((uint64_t)x19 * x59) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + (((uint64_t)x31 * x47) + (((uint64_t)x33 * x45) + ((uint64_t)x32 * x43))))))))))))));
+ { uint64_t x77 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((uint64_t)x9 * x35))) + (0x9 * (((uint64_t)x11 * x62) + (((uint64_t)x13 * x63) + (((uint64_t)x15 * x61) + (((uint64_t)x17 * x59) + (((uint64_t)x19 * x57) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + (((uint64_t)x31 * x45) + (((uint64_t)x33 * x43) + ((uint64_t)x32 * x41)))))))))))))));
+ { uint64_t x78 = ((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) + (0x9 * (((uint64_t)x9 * x62) + (((uint64_t)x11 * x63) + (((uint64_t)x13 * x61) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + (((uint64_t)x33 * x41) + ((uint64_t)x32 * x39))))))))))))))));
+ { uint64_t x79 = (((uint64_t)x5 * x35) + (0x9 * ((0x2 * ((uint64_t)x7 * x62)) + ((0x2 * ((uint64_t)x9 * x63)) + ((0x2 * ((uint64_t)x11 * x61)) + ((0x2 * ((uint64_t)x13 * x59)) + ((0x2 * ((uint64_t)x15 * x57)) + ((0x2 * ((uint64_t)x17 * x55)) + ((0x2 * ((uint64_t)x19 * x53)) + ((0x2 * ((uint64_t)x21 * x51)) + ((0x2 * ((uint64_t)x23 * x49)) + ((0x2 * ((uint64_t)x25 * x47)) + ((0x2 * ((uint64_t)x27 * x45)) + ((0x2 * ((uint64_t)x29 * x43)) + ((0x2 * ((uint64_t)x31 * x41)) + ((0x2 * ((uint64_t)x33 * x39)) + (0x2 * ((uint64_t)x32 * x37))))))))))))))))));
+ { uint32_t x80 = (uint32_t) (x79 >> 0x15);
+ { uint32_t x81 = ((uint32_t)x79 & 0x1fffff);
+ { uint64_t x82 = (x80 + x78);
+ { uint32_t x83 = (uint32_t) (x82 >> 0x14);
+ { uint32_t x84 = ((uint32_t)x82 & 0xfffff);
+ { uint64_t x85 = (x83 + x77);
+ { uint32_t x86 = (uint32_t) (x85 >> 0x14);
+ { uint32_t x87 = ((uint32_t)x85 & 0xfffff);
+ { uint64_t x88 = (x86 + x76);
+ { uint32_t x89 = (uint32_t) (x88 >> 0x14);
+ { uint32_t x90 = ((uint32_t)x88 & 0xfffff);
+ { uint64_t x91 = (x89 + x75);
+ { uint32_t x92 = (uint32_t) (x91 >> 0x14);
+ { uint32_t x93 = ((uint32_t)x91 & 0xfffff);
+ { uint64_t x94 = (x92 + x74);
+ { uint32_t x95 = (uint32_t) (x94 >> 0x14);
+ { uint32_t x96 = ((uint32_t)x94 & 0xfffff);
+ { uint64_t x97 = (x95 + x73);
+ { uint32_t x98 = (uint32_t) (x97 >> 0x14);
+ { uint32_t x99 = ((uint32_t)x97 & 0xfffff);
+ { uint64_t x100 = (x98 + x72);
+ { uint32_t x101 = (uint32_t) (x100 >> 0x14);
+ { uint32_t x102 = ((uint32_t)x100 & 0xfffff);
+ { uint64_t x103 = (x101 + x71);
+ { uint32_t x104 = (uint32_t) (x103 >> 0x14);
+ { uint32_t x105 = ((uint32_t)x103 & 0xfffff);
+ { uint64_t x106 = (x104 + x70);
+ { uint32_t x107 = (uint32_t) (x106 >> 0x14);
+ { uint32_t x108 = ((uint32_t)x106 & 0xfffff);
+ { uint64_t x109 = (x107 + x69);
+ { uint32_t x110 = (uint32_t) (x109 >> 0x14);
+ { uint32_t x111 = ((uint32_t)x109 & 0xfffff);
+ { uint64_t x112 = (x110 + x68);
+ { uint32_t x113 = (uint32_t) (x112 >> 0x14);
+ { uint32_t x114 = ((uint32_t)x112 & 0xfffff);
+ { uint64_t x115 = (x113 + x67);
+ { uint32_t x116 = (uint32_t) (x115 >> 0x14);
+ { uint32_t x117 = ((uint32_t)x115 & 0xfffff);
+ { uint64_t x118 = (x116 + x66);
+ { uint32_t x119 = (uint32_t) (x118 >> 0x14);
+ { uint32_t x120 = ((uint32_t)x118 & 0xfffff);
+ { uint64_t x121 = (x119 + x65);
+ { uint32_t x122 = (uint32_t) (x121 >> 0x14);
+ { uint32_t x123 = ((uint32_t)x121 & 0xfffff);
+ { uint64_t x124 = (x122 + x64);
+ { uint32_t x125 = (uint32_t) (x124 >> 0x14);
+ { uint32_t x126 = ((uint32_t)x124 & 0xfffff);
+ { uint32_t x127 = (x81 + (0x9 * x125));
+ { uint32_t x128 = (x127 >> 0x15);
+ { uint32_t x129 = (x127 & 0x1fffff);
+ { uint32_t x130 = (x128 + x84);
+ { uint32_t x131 = (x130 >> 0x14);
+ { uint32_t x132 = (x130 & 0xfffff);
+ out[0] = x129;
+ out[1] = x132;
+ out[2] = (x131 + x87);
+ out[3] = x90;
+ out[4] = x93;
+ out[5] = x96;
+ out[6] = x99;
+ out[7] = x102;
+ out[8] = x105;
+ out[9] = x108;
+ out[10] = x111;
+ out[11] = x114;
+ out[12] = x117;
+ out[13] = x120;
+ out[14] = x123;
+ out[15] = x126;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e321m9/fesquare.c b/src/Specific/solinas32_2e321m9/fesquare.c
index 9cfed4a93..fbca43143 100644
--- a/src/Specific/solinas32_2e321m9/fesquare.c
+++ b/src/Specific/solinas32_2e321m9/fesquare.c
@@ -1,106 +1,104 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x31 = (((uint64_t)x2 * x29) + ((0x2 * ((uint64_t)x4 * x30)) + ((0x2 * ((uint64_t)x6 * x28)) + ((0x2 * ((uint64_t)x8 * x26)) + ((0x2 * ((uint64_t)x10 * x24)) + ((0x2 * ((uint64_t)x12 * x22)) + ((0x2 * ((uint64_t)x14 * x20)) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + ((0x2 * ((uint64_t)x20 * x14)) + ((0x2 * ((uint64_t)x22 * x12)) + ((0x2 * ((uint64_t)x24 * x10)) + ((0x2 * ((uint64_t)x26 * x8)) + ((0x2 * ((uint64_t)x28 * x6)) + ((0x2 * ((uint64_t)x30 * x4)) + ((uint64_t)x29 * x2))))))))))))))));
-{ uint64_t x32 = ((((uint64_t)x2 * x30) + ((0x2 * ((uint64_t)x4 * x28)) + ((0x2 * ((uint64_t)x6 * x26)) + ((0x2 * ((uint64_t)x8 * x24)) + ((0x2 * ((uint64_t)x10 * x22)) + ((0x2 * ((uint64_t)x12 * x20)) + ((0x2 * ((uint64_t)x14 * x18)) + ((0x2 * ((uint64_t)x16 * x16)) + ((0x2 * ((uint64_t)x18 * x14)) + ((0x2 * ((uint64_t)x20 * x12)) + ((0x2 * ((uint64_t)x22 * x10)) + ((0x2 * ((uint64_t)x24 * x8)) + ((0x2 * ((uint64_t)x26 * x6)) + ((0x2 * ((uint64_t)x28 * x4)) + ((uint64_t)x30 * x2))))))))))))))) + (0x9 * ((uint64_t)x29 * x29)));
-{ uint64_t x33 = ((((uint64_t)x2 * x28) + ((0x2 * ((uint64_t)x4 * x26)) + ((0x2 * ((uint64_t)x6 * x24)) + ((0x2 * ((uint64_t)x8 * x22)) + ((0x2 * ((uint64_t)x10 * x20)) + ((0x2 * ((uint64_t)x12 * x18)) + ((0x2 * ((uint64_t)x14 * x16)) + ((0x2 * ((uint64_t)x16 * x14)) + ((0x2 * ((uint64_t)x18 * x12)) + ((0x2 * ((uint64_t)x20 * x10)) + ((0x2 * ((uint64_t)x22 * x8)) + ((0x2 * ((uint64_t)x24 * x6)) + ((0x2 * ((uint64_t)x26 * x4)) + ((uint64_t)x28 * x2)))))))))))))) + (0x9 * (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))));
-{ uint64_t x34 = ((((uint64_t)x2 * x26) + ((0x2 * ((uint64_t)x4 * x24)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + ((0x2 * ((uint64_t)x14 * x14)) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + ((0x2 * ((uint64_t)x24 * x4)) + ((uint64_t)x26 * x2))))))))))))) + (0x9 * (((uint64_t)x28 * x29) + (((uint64_t)x30 * x30) + ((uint64_t)x29 * x28)))));
-{ uint64_t x35 = ((((uint64_t)x2 * x24) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + ((0x2 * ((uint64_t)x8 * x18)) + ((0x2 * ((uint64_t)x10 * x16)) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + ((0x2 * ((uint64_t)x16 * x10)) + ((0x2 * ((uint64_t)x18 * x8)) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x24 * x2)))))))))))) + (0x9 * (((uint64_t)x26 * x29) + (((uint64_t)x28 * x30) + (((uint64_t)x30 * x28) + ((uint64_t)x29 * x26))))));
-{ uint64_t x36 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x9 * (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + (((uint64_t)x28 * x28) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))));
-{ uint64_t x37 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x9 * (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))));
-{ uint64_t x38 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x9 * (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))));
-{ uint64_t x39 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x9 * (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18))))))))));
-{ uint64_t x40 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x9 * (((uint64_t)x16 * x29) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + (((uint64_t)x30 * x18) + ((uint64_t)x29 * x16)))))))))));
-{ uint64_t x41 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x9 * (((uint64_t)x14 * x29) + (((uint64_t)x16 * x30) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + (((uint64_t)x28 * x18) + (((uint64_t)x30 * x16) + ((uint64_t)x29 * x14))))))))))));
-{ uint64_t x42 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x9 * (((uint64_t)x12 * x29) + (((uint64_t)x14 * x30) + (((uint64_t)x16 * x28) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + (((uint64_t)x26 * x18) + (((uint64_t)x28 * x16) + (((uint64_t)x30 * x14) + ((uint64_t)x29 * x12)))))))))))));
-{ uint64_t x43 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x9 * (((uint64_t)x10 * x29) + (((uint64_t)x12 * x30) + (((uint64_t)x14 * x28) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + (((uint64_t)x26 * x16) + (((uint64_t)x28 * x14) + (((uint64_t)x30 * x12) + ((uint64_t)x29 * x10))))))))))))));
-{ uint64_t x44 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x9 * (((uint64_t)x8 * x29) + (((uint64_t)x10 * x30) + (((uint64_t)x12 * x28) + (((uint64_t)x14 * x26) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + (((uint64_t)x26 * x14) + (((uint64_t)x28 * x12) + (((uint64_t)x30 * x10) + ((uint64_t)x29 * x8)))))))))))))));
-{ uint64_t x45 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x9 * (((uint64_t)x6 * x29) + (((uint64_t)x8 * x30) + (((uint64_t)x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + (((uint64_t)x28 * x10) + (((uint64_t)x30 * x8) + ((uint64_t)x29 * x6))))))))))))))));
-{ uint64_t x46 = (((uint64_t)x2 * x2) + (0x9 * ((0x2 * ((uint64_t)x4 * x29)) + ((0x2 * ((uint64_t)x6 * x30)) + ((0x2 * ((uint64_t)x8 * x28)) + ((0x2 * ((uint64_t)x10 * x26)) + ((0x2 * ((uint64_t)x12 * x24)) + ((0x2 * ((uint64_t)x14 * x22)) + ((0x2 * ((uint64_t)x16 * x20)) + ((0x2 * ((uint64_t)x18 * x18)) + ((0x2 * ((uint64_t)x20 * x16)) + ((0x2 * ((uint64_t)x22 * x14)) + ((0x2 * ((uint64_t)x24 * x12)) + ((0x2 * ((uint64_t)x26 * x10)) + ((0x2 * ((uint64_t)x28 * x8)) + ((0x2 * ((uint64_t)x30 * x6)) + (0x2 * ((uint64_t)x29 * x4))))))))))))))))));
-{ uint32_t x47 = (uint32_t) (x46 >> 0x15);
-{ uint32_t x48 = ((uint32_t)x46 & 0x1fffff);
-{ uint64_t x49 = (x47 + x45);
-{ uint32_t x50 = (uint32_t) (x49 >> 0x14);
-{ uint32_t x51 = ((uint32_t)x49 & 0xfffff);
-{ uint64_t x52 = (x50 + x44);
-{ uint32_t x53 = (uint32_t) (x52 >> 0x14);
-{ uint32_t x54 = ((uint32_t)x52 & 0xfffff);
-{ uint64_t x55 = (x53 + x43);
-{ uint32_t x56 = (uint32_t) (x55 >> 0x14);
-{ uint32_t x57 = ((uint32_t)x55 & 0xfffff);
-{ uint64_t x58 = (x56 + x42);
-{ uint32_t x59 = (uint32_t) (x58 >> 0x14);
-{ uint32_t x60 = ((uint32_t)x58 & 0xfffff);
-{ uint64_t x61 = (x59 + x41);
-{ uint32_t x62 = (uint32_t) (x61 >> 0x14);
-{ uint32_t x63 = ((uint32_t)x61 & 0xfffff);
-{ uint64_t x64 = (x62 + x40);
-{ uint32_t x65 = (uint32_t) (x64 >> 0x14);
-{ uint32_t x66 = ((uint32_t)x64 & 0xfffff);
-{ uint64_t x67 = (x65 + x39);
-{ uint32_t x68 = (uint32_t) (x67 >> 0x14);
-{ uint32_t x69 = ((uint32_t)x67 & 0xfffff);
-{ uint64_t x70 = (x68 + x38);
-{ uint32_t x71 = (uint32_t) (x70 >> 0x14);
-{ uint32_t x72 = ((uint32_t)x70 & 0xfffff);
-{ uint64_t x73 = (x71 + x37);
-{ uint32_t x74 = (uint32_t) (x73 >> 0x14);
-{ uint32_t x75 = ((uint32_t)x73 & 0xfffff);
-{ uint64_t x76 = (x74 + x36);
-{ uint32_t x77 = (uint32_t) (x76 >> 0x14);
-{ uint32_t x78 = ((uint32_t)x76 & 0xfffff);
-{ uint64_t x79 = (x77 + x35);
-{ uint32_t x80 = (uint32_t) (x79 >> 0x14);
-{ uint32_t x81 = ((uint32_t)x79 & 0xfffff);
-{ uint64_t x82 = (x80 + x34);
-{ uint32_t x83 = (uint32_t) (x82 >> 0x14);
-{ uint32_t x84 = ((uint32_t)x82 & 0xfffff);
-{ uint64_t x85 = (x83 + x33);
-{ uint32_t x86 = (uint32_t) (x85 >> 0x14);
-{ uint32_t x87 = ((uint32_t)x85 & 0xfffff);
-{ uint64_t x88 = (x86 + x32);
-{ uint32_t x89 = (uint32_t) (x88 >> 0x14);
-{ uint32_t x90 = ((uint32_t)x88 & 0xfffff);
-{ uint64_t x91 = (x89 + x31);
-{ uint32_t x92 = (uint32_t) (x91 >> 0x14);
-{ uint32_t x93 = ((uint32_t)x91 & 0xfffff);
-{ uint32_t x94 = (x48 + (0x9 * x92));
-{ uint32_t x95 = (x94 >> 0x15);
-{ uint32_t x96 = (x94 & 0x1fffff);
-{ uint32_t x97 = (x95 + x51);
-{ uint32_t x98 = (x97 >> 0x14);
-{ uint32_t x99 = (x97 & 0xfffff);
-out[0] = x93;
-out[1] = x90;
-out[2] = x87;
-out[3] = x84;
-out[4] = x81;
-out[5] = x78;
-out[6] = x75;
-out[7] = x72;
-out[8] = x69;
-out[9] = x66;
-out[10] = x63;
-out[11] = x60;
-out[12] = x57;
-out[13] = x98 + x54;
-out[14] = x99;
-out[15] = x96;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[16];
+static void fesquare(uint32_t out[16], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x31 = (((uint64_t)x2 * x29) + ((0x2 * ((uint64_t)x4 * x30)) + ((0x2 * ((uint64_t)x6 * x28)) + ((0x2 * ((uint64_t)x8 * x26)) + ((0x2 * ((uint64_t)x10 * x24)) + ((0x2 * ((uint64_t)x12 * x22)) + ((0x2 * ((uint64_t)x14 * x20)) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + ((0x2 * ((uint64_t)x20 * x14)) + ((0x2 * ((uint64_t)x22 * x12)) + ((0x2 * ((uint64_t)x24 * x10)) + ((0x2 * ((uint64_t)x26 * x8)) + ((0x2 * ((uint64_t)x28 * x6)) + ((0x2 * ((uint64_t)x30 * x4)) + ((uint64_t)x29 * x2))))))))))))))));
+ { uint64_t x32 = ((((uint64_t)x2 * x30) + ((0x2 * ((uint64_t)x4 * x28)) + ((0x2 * ((uint64_t)x6 * x26)) + ((0x2 * ((uint64_t)x8 * x24)) + ((0x2 * ((uint64_t)x10 * x22)) + ((0x2 * ((uint64_t)x12 * x20)) + ((0x2 * ((uint64_t)x14 * x18)) + ((0x2 * ((uint64_t)x16 * x16)) + ((0x2 * ((uint64_t)x18 * x14)) + ((0x2 * ((uint64_t)x20 * x12)) + ((0x2 * ((uint64_t)x22 * x10)) + ((0x2 * ((uint64_t)x24 * x8)) + ((0x2 * ((uint64_t)x26 * x6)) + ((0x2 * ((uint64_t)x28 * x4)) + ((uint64_t)x30 * x2))))))))))))))) + (0x9 * ((uint64_t)x29 * x29)));
+ { uint64_t x33 = ((((uint64_t)x2 * x28) + ((0x2 * ((uint64_t)x4 * x26)) + ((0x2 * ((uint64_t)x6 * x24)) + ((0x2 * ((uint64_t)x8 * x22)) + ((0x2 * ((uint64_t)x10 * x20)) + ((0x2 * ((uint64_t)x12 * x18)) + ((0x2 * ((uint64_t)x14 * x16)) + ((0x2 * ((uint64_t)x16 * x14)) + ((0x2 * ((uint64_t)x18 * x12)) + ((0x2 * ((uint64_t)x20 * x10)) + ((0x2 * ((uint64_t)x22 * x8)) + ((0x2 * ((uint64_t)x24 * x6)) + ((0x2 * ((uint64_t)x26 * x4)) + ((uint64_t)x28 * x2)))))))))))))) + (0x9 * (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))));
+ { uint64_t x34 = ((((uint64_t)x2 * x26) + ((0x2 * ((uint64_t)x4 * x24)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + ((0x2 * ((uint64_t)x14 * x14)) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + ((0x2 * ((uint64_t)x24 * x4)) + ((uint64_t)x26 * x2))))))))))))) + (0x9 * (((uint64_t)x28 * x29) + (((uint64_t)x30 * x30) + ((uint64_t)x29 * x28)))));
+ { uint64_t x35 = ((((uint64_t)x2 * x24) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + ((0x2 * ((uint64_t)x8 * x18)) + ((0x2 * ((uint64_t)x10 * x16)) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + ((0x2 * ((uint64_t)x16 * x10)) + ((0x2 * ((uint64_t)x18 * x8)) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x24 * x2)))))))))))) + (0x9 * (((uint64_t)x26 * x29) + (((uint64_t)x28 * x30) + (((uint64_t)x30 * x28) + ((uint64_t)x29 * x26))))));
+ { uint64_t x36 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x9 * (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + (((uint64_t)x28 * x28) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))));
+ { uint64_t x37 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x9 * (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))));
+ { uint64_t x38 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x9 * (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))));
+ { uint64_t x39 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x9 * (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18))))))))));
+ { uint64_t x40 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x9 * (((uint64_t)x16 * x29) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + (((uint64_t)x30 * x18) + ((uint64_t)x29 * x16)))))))))));
+ { uint64_t x41 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x9 * (((uint64_t)x14 * x29) + (((uint64_t)x16 * x30) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + (((uint64_t)x28 * x18) + (((uint64_t)x30 * x16) + ((uint64_t)x29 * x14))))))))))));
+ { uint64_t x42 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x9 * (((uint64_t)x12 * x29) + (((uint64_t)x14 * x30) + (((uint64_t)x16 * x28) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + (((uint64_t)x26 * x18) + (((uint64_t)x28 * x16) + (((uint64_t)x30 * x14) + ((uint64_t)x29 * x12)))))))))))));
+ { uint64_t x43 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x9 * (((uint64_t)x10 * x29) + (((uint64_t)x12 * x30) + (((uint64_t)x14 * x28) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + (((uint64_t)x26 * x16) + (((uint64_t)x28 * x14) + (((uint64_t)x30 * x12) + ((uint64_t)x29 * x10))))))))))))));
+ { uint64_t x44 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x9 * (((uint64_t)x8 * x29) + (((uint64_t)x10 * x30) + (((uint64_t)x12 * x28) + (((uint64_t)x14 * x26) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + (((uint64_t)x26 * x14) + (((uint64_t)x28 * x12) + (((uint64_t)x30 * x10) + ((uint64_t)x29 * x8)))))))))))))));
+ { uint64_t x45 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x9 * (((uint64_t)x6 * x29) + (((uint64_t)x8 * x30) + (((uint64_t)x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + (((uint64_t)x28 * x10) + (((uint64_t)x30 * x8) + ((uint64_t)x29 * x6))))))))))))))));
+ { uint64_t x46 = (((uint64_t)x2 * x2) + (0x9 * ((0x2 * ((uint64_t)x4 * x29)) + ((0x2 * ((uint64_t)x6 * x30)) + ((0x2 * ((uint64_t)x8 * x28)) + ((0x2 * ((uint64_t)x10 * x26)) + ((0x2 * ((uint64_t)x12 * x24)) + ((0x2 * ((uint64_t)x14 * x22)) + ((0x2 * ((uint64_t)x16 * x20)) + ((0x2 * ((uint64_t)x18 * x18)) + ((0x2 * ((uint64_t)x20 * x16)) + ((0x2 * ((uint64_t)x22 * x14)) + ((0x2 * ((uint64_t)x24 * x12)) + ((0x2 * ((uint64_t)x26 * x10)) + ((0x2 * ((uint64_t)x28 * x8)) + ((0x2 * ((uint64_t)x30 * x6)) + (0x2 * ((uint64_t)x29 * x4))))))))))))))))));
+ { uint32_t x47 = (uint32_t) (x46 >> 0x15);
+ { uint32_t x48 = ((uint32_t)x46 & 0x1fffff);
+ { uint64_t x49 = (x47 + x45);
+ { uint32_t x50 = (uint32_t) (x49 >> 0x14);
+ { uint32_t x51 = ((uint32_t)x49 & 0xfffff);
+ { uint64_t x52 = (x50 + x44);
+ { uint32_t x53 = (uint32_t) (x52 >> 0x14);
+ { uint32_t x54 = ((uint32_t)x52 & 0xfffff);
+ { uint64_t x55 = (x53 + x43);
+ { uint32_t x56 = (uint32_t) (x55 >> 0x14);
+ { uint32_t x57 = ((uint32_t)x55 & 0xfffff);
+ { uint64_t x58 = (x56 + x42);
+ { uint32_t x59 = (uint32_t) (x58 >> 0x14);
+ { uint32_t x60 = ((uint32_t)x58 & 0xfffff);
+ { uint64_t x61 = (x59 + x41);
+ { uint32_t x62 = (uint32_t) (x61 >> 0x14);
+ { uint32_t x63 = ((uint32_t)x61 & 0xfffff);
+ { uint64_t x64 = (x62 + x40);
+ { uint32_t x65 = (uint32_t) (x64 >> 0x14);
+ { uint32_t x66 = ((uint32_t)x64 & 0xfffff);
+ { uint64_t x67 = (x65 + x39);
+ { uint32_t x68 = (uint32_t) (x67 >> 0x14);
+ { uint32_t x69 = ((uint32_t)x67 & 0xfffff);
+ { uint64_t x70 = (x68 + x38);
+ { uint32_t x71 = (uint32_t) (x70 >> 0x14);
+ { uint32_t x72 = ((uint32_t)x70 & 0xfffff);
+ { uint64_t x73 = (x71 + x37);
+ { uint32_t x74 = (uint32_t) (x73 >> 0x14);
+ { uint32_t x75 = ((uint32_t)x73 & 0xfffff);
+ { uint64_t x76 = (x74 + x36);
+ { uint32_t x77 = (uint32_t) (x76 >> 0x14);
+ { uint32_t x78 = ((uint32_t)x76 & 0xfffff);
+ { uint64_t x79 = (x77 + x35);
+ { uint32_t x80 = (uint32_t) (x79 >> 0x14);
+ { uint32_t x81 = ((uint32_t)x79 & 0xfffff);
+ { uint64_t x82 = (x80 + x34);
+ { uint32_t x83 = (uint32_t) (x82 >> 0x14);
+ { uint32_t x84 = ((uint32_t)x82 & 0xfffff);
+ { uint64_t x85 = (x83 + x33);
+ { uint32_t x86 = (uint32_t) (x85 >> 0x14);
+ { uint32_t x87 = ((uint32_t)x85 & 0xfffff);
+ { uint64_t x88 = (x86 + x32);
+ { uint32_t x89 = (uint32_t) (x88 >> 0x14);
+ { uint32_t x90 = ((uint32_t)x88 & 0xfffff);
+ { uint64_t x91 = (x89 + x31);
+ { uint32_t x92 = (uint32_t) (x91 >> 0x14);
+ { uint32_t x93 = ((uint32_t)x91 & 0xfffff);
+ { uint32_t x94 = (x48 + (0x9 * x92));
+ { uint32_t x95 = (x94 >> 0x15);
+ { uint32_t x96 = (x94 & 0x1fffff);
+ { uint32_t x97 = (x95 + x51);
+ { uint32_t x98 = (x97 >> 0x14);
+ { uint32_t x99 = (x97 & 0xfffff);
+ out[0] = x96;
+ out[1] = x99;
+ out[2] = (x98 + x54);
+ out[3] = x57;
+ out[4] = x60;
+ out[5] = x63;
+ out[6] = x66;
+ out[7] = x69;
+ out[8] = x72;
+ out[9] = x75;
+ out[10] = x78;
+ out[11] = x81;
+ out[12] = x84;
+ out[13] = x87;
+ out[14] = x90;
+ out[15] = x93;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e321m9/freeze.c b/src/Specific/solinas32_2e321m9/freeze.c
index d44bc9b83..02edaa972 100644
--- a/src/Specific/solinas32_2e321m9/freeze.c
+++ b/src/Specific/solinas32_2e321m9/freeze.c
@@ -1,25 +1,84 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x32;
-out[1] = uint8_t x33 = Op Syntax.SubWithGetBorrow 21 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x1ffff7;;
+static void freeze(uint32_t out[16], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x1ffff7);
+ { uint32_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x4, 0xfffff);
+ { uint32_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x36, Return x6, 0xfffff);
+ { uint32_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x39, Return x8, 0xfffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x10, 0xfffff);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x12, 0xfffff);
+ { uint32_t x50, uint8_t x51 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x48, Return x14, 0xfffff);
+ { uint32_t x53, uint8_t x54 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x51, Return x16, 0xfffff);
+ { uint32_t x56, uint8_t x57 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x54, Return x18, 0xfffff);
+ { uint32_t x59, uint8_t x60 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x20, 0xfffff);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x22, 0xfffff);
+ { uint32_t x65, uint8_t x66 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x24, 0xfffff);
+ { uint32_t x68, uint8_t x69 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x66, Return x26, 0xfffff);
+ { uint32_t x71, uint8_t x72 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x69, Return x28, 0xfffff);
+ { uint32_t x74, uint8_t x75 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x30, 0xfffff);
+ { uint32_t x77, uint8_t x78 = Op (Syntax.SubWithGetBorrow 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x29, 0xfffff);
+ { uint32_t x79 = (uint32_t)cmovznz(x78, 0x0, 0xffffffff);
+ { uint32_t x80 = (x79 & 0x1ffff7);
+ { uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 21 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x32, Return x80);
+ { uint32_t x84 = (x79 & 0xfffff);
+ { uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x35, Return x84);
+ { uint32_t x88 = (x79 & 0xfffff);
+ { uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x38, Return x88);
+ { uint32_t x92 = (x79 & 0xfffff);
+ { uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x41, Return x92);
+ { uint32_t x96 = (x79 & 0xfffff);
+ { uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x44, Return x96);
+ { uint32_t x100 = (x79 & 0xfffff);
+ { uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x47, Return x100);
+ { uint32_t x104 = (x79 & 0xfffff);
+ { uint32_t x106, uint8_t x107 = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x50, Return x104);
+ { uint32_t x108 = (x79 & 0xfffff);
+ { uint32_t x110, uint8_t x111 = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x107, Return x53, Return x108);
+ { uint32_t x112 = (x79 & 0xfffff);
+ { uint32_t x114, uint8_t x115 = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x111, Return x56, Return x112);
+ { uint32_t x116 = (x79 & 0xfffff);
+ { uint32_t x118, uint8_t x119 = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x115, Return x59, Return x116);
+ { uint32_t x120 = (x79 & 0xfffff);
+ { uint32_t x122, uint8_t x123 = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x119, Return x62, Return x120);
+ { uint32_t x124 = (x79 & 0xfffff);
+ { uint32_t x126, uint8_t x127 = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x123, Return x65, Return x124);
+ { uint32_t x128 = (x79 & 0xfffff);
+ { uint32_t x130, uint8_t x131 = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x127, Return x68, Return x128);
+ { uint32_t x132 = (x79 & 0xfffff);
+ { uint32_t x134, uint8_t x135 = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x131, Return x71, Return x132);
+ { uint32_t x136 = (x79 & 0xfffff);
+ { uint32_t x138, uint8_t x139 = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x135, Return x74, Return x136);
+ { uint32_t x140 = (x79 & 0xfffff);
+ { uint32_t x142, uint8_t _ = Op (Syntax.AddWithGetCarry 20 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x139, Return x77, Return x140);
+ out[0] = x82;
+ out[1] = x86;
+ out[2] = x90;
+ out[3] = x94;
+ out[4] = x98;
+ out[5] = x102;
+ out[6] = x106;
+ out[7] = x110;
+ out[8] = x114;
+ out[9] = x118;
+ out[10] = x122;
+ out[11] = x126;
+ out[12] = x130;
+ out[13] = x134;
+ out[14] = x138;
+ out[15] = x142;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e322m2e161m1/femul.c b/src/Specific/solinas32_2e322m2e161m1/femul.c
index 7cfe5c54c..6111bb862 100644
--- a/src/Specific/solinas32_2e322m2e161m1/femul.c
+++ b/src/Specific/solinas32_2e322m2e161m1/femul.c
@@ -1,119 +1,129 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x28, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x54, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31)
-{ uint64_t x56 = (((uint64_t)(x17 + x28) * (x43 + x54)) - ((uint64_t)x17 * x43));
-{ uint64_t x57 = ((((uint64_t)(x15 + x29) * (x43 + x54)) + ((uint64_t)(x17 + x28) * (x41 + x55))) - (((uint64_t)x15 * x43) + ((uint64_t)x17 * x41)));
-{ uint64_t x58 = ((((uint64_t)(x13 + x27) * (x43 + x54)) + (((uint64_t)(x15 + x29) * (x41 + x55)) + ((uint64_t)(x17 + x28) * (x39 + x53)))) - (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + ((uint64_t)x17 * x39))));
-{ uint64_t x59 = ((((uint64_t)(x11 + x25) * (x43 + x54)) + (((uint64_t)(x13 + x27) * (x41 + x55)) + (((uint64_t)(x15 + x29) * (x39 + x53)) + ((uint64_t)(x17 + x28) * (x37 + x51))))) - (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + ((uint64_t)x17 * x37)))));
-{ uint64_t x60 = ((((uint64_t)(x9 + x23) * (x43 + x54)) + (((uint64_t)(x11 + x25) * (x41 + x55)) + (((uint64_t)(x13 + x27) * (x39 + x53)) + (((uint64_t)(x15 + x29) * (x37 + x51)) + ((uint64_t)(x17 + x28) * (x35 + x49)))))) - (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((uint64_t)x17 * x35))))));
-{ uint64_t x61 = ((((uint64_t)(x7 + x21) * (x43 + x54)) + (((uint64_t)(x9 + x23) * (x41 + x55)) + (((uint64_t)(x11 + x25) * (x39 + x53)) + (((uint64_t)(x13 + x27) * (x37 + x51)) + (((uint64_t)(x15 + x29) * (x35 + x49)) + ((uint64_t)(x17 + x28) * (x33 + x47))))))) - (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + ((uint64_t)x17 * x33)))))));
-{ uint64_t x62 = ((((uint64_t)(x5 + x19) * (x43 + x54)) + (((uint64_t)(x7 + x21) * (x41 + x55)) + (((uint64_t)(x9 + x23) * (x39 + x53)) + (((uint64_t)(x11 + x25) * (x37 + x51)) + (((uint64_t)(x13 + x27) * (x35 + x49)) + (((uint64_t)(x15 + x29) * (x33 + x47)) + ((uint64_t)(x17 + x28) * (x31 + x45)))))))) - (((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + ((uint64_t)x17 * x31))))))));
-{ uint64_t x63 = ((((uint64_t)(x5 + x19) * (x41 + x55)) + (((uint64_t)(x7 + x21) * (x39 + x53)) + (((uint64_t)(x9 + x23) * (x37 + x51)) + (((uint64_t)(x11 + x25) * (x35 + x49)) + (((uint64_t)(x13 + x27) * (x33 + x47)) + ((uint64_t)(x15 + x29) * (x31 + x45))))))) - (((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + ((uint64_t)x15 * x31)))))));
-{ uint64_t x64 = ((((uint64_t)(x5 + x19) * (x39 + x53)) + (((uint64_t)(x7 + x21) * (x37 + x51)) + (((uint64_t)(x9 + x23) * (x35 + x49)) + (((uint64_t)(x11 + x25) * (x33 + x47)) + ((uint64_t)(x13 + x27) * (x31 + x45)))))) - (((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + ((uint64_t)x13 * x31))))));
-{ uint64_t x65 = ((((uint64_t)(x5 + x19) * (x37 + x51)) + (((uint64_t)(x7 + x21) * (x35 + x49)) + (((uint64_t)(x9 + x23) * (x33 + x47)) + ((uint64_t)(x11 + x25) * (x31 + x45))))) - (((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + ((uint64_t)x11 * x31)))));
-{ uint64_t x66 = ((((uint64_t)(x5 + x19) * (x35 + x49)) + (((uint64_t)(x7 + x21) * (x33 + x47)) + ((uint64_t)(x9 + x23) * (x31 + x45)))) - (((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + ((uint64_t)x9 * x31))));
-{ uint64_t x67 = ((((uint64_t)(x5 + x19) * (x33 + x47)) + ((uint64_t)(x7 + x21) * (x31 + x45))) - (((uint64_t)x5 * x33) + ((uint64_t)x7 * x31)));
-{ uint64_t x68 = (((uint64_t)(x5 + x19) * (x31 + x45)) - ((uint64_t)x5 * x31));
-{ uint64_t x69 = (((((uint64_t)x17 * x43) + ((uint64_t)x28 * x54)) + x63) + x56);
-{ uint64_t x70 = ((((((uint64_t)x15 * x43) + ((uint64_t)x17 * x41)) + (((uint64_t)x29 * x54) + ((uint64_t)x28 * x55))) + x64) + x57);
-{ uint64_t x71 = ((((((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + ((uint64_t)x17 * x39))) + (((uint64_t)x27 * x54) + (((uint64_t)x29 * x55) + ((uint64_t)x28 * x53)))) + x65) + x58);
-{ uint64_t x72 = ((((((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + ((uint64_t)x17 * x37)))) + (((uint64_t)x25 * x54) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + ((uint64_t)x28 * x51))))) + x66) + x59);
-{ uint64_t x73 = ((((((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((uint64_t)x17 * x35))))) + (((uint64_t)x23 * x54) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + (((uint64_t)x29 * x51) + ((uint64_t)x28 * x49)))))) + x67) + x60);
-{ uint64_t x74 = ((((((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + ((uint64_t)x17 * x33)))))) + (((uint64_t)x21 * x54) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + ((uint64_t)x28 * x47))))))) + x68) + x61);
-{ uint64_t x75 = ((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + ((uint64_t)x17 * x31))))))) + (((uint64_t)x19 * x54) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + ((uint64_t)x28 * x45))))))));
-{ uint64_t x76 = (((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + ((uint64_t)x15 * x31)))))) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + ((uint64_t)x29 * x45))))))) + x56);
-{ uint64_t x77 = (((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + ((uint64_t)x13 * x31))))) + (((uint64_t)x19 * x53) + (((uint64_t)x21 * x51) + (((uint64_t)x23 * x49) + (((uint64_t)x25 * x47) + ((uint64_t)x27 * x45)))))) + x57);
-{ uint64_t x78 = (((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + ((uint64_t)x11 * x31)))) + (((uint64_t)x19 * x51) + (((uint64_t)x21 * x49) + (((uint64_t)x23 * x47) + ((uint64_t)x25 * x45))))) + x58);
-{ uint64_t x79 = (((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + ((uint64_t)x9 * x31))) + (((uint64_t)x19 * x49) + (((uint64_t)x21 * x47) + ((uint64_t)x23 * x45)))) + x59);
-{ uint64_t x80 = (((((uint64_t)x5 * x33) + ((uint64_t)x7 * x31)) + (((uint64_t)x19 * x47) + ((uint64_t)x21 * x45))) + x60);
-{ uint64_t x81 = ((((uint64_t)x5 * x31) + ((uint64_t)x19 * x45)) + x61);
-{ uint32_t x82 = (uint32_t) (x75 >> 0x17);
-{ uint32_t x83 = ((uint32_t)x75 & 0x7fffff);
-{ uint32_t x84 = (uint32_t) (x62 >> 0x17);
-{ uint32_t x85 = ((uint32_t)x62 & 0x7fffff);
-{ uint64_t x86 = (((uint64_t)0x800000 * x84) + x85);
-{ uint32_t x87 = (uint32_t) (x86 >> 0x17);
-{ uint32_t x88 = ((uint32_t)x86 & 0x7fffff);
-{ uint64_t x89 = ((x82 + x74) + x87);
-{ uint32_t x90 = (uint32_t) (x89 >> 0x17);
-{ uint32_t x91 = ((uint32_t)x89 & 0x7fffff);
-{ uint64_t x92 = (x81 + x87);
-{ uint32_t x93 = (uint32_t) (x92 >> 0x17);
-{ uint32_t x94 = ((uint32_t)x92 & 0x7fffff);
-{ uint64_t x95 = (x90 + x73);
-{ uint32_t x96 = (uint32_t) (x95 >> 0x17);
-{ uint32_t x97 = ((uint32_t)x95 & 0x7fffff);
-{ uint64_t x98 = (x93 + x80);
-{ uint32_t x99 = (uint32_t) (x98 >> 0x17);
-{ uint32_t x100 = ((uint32_t)x98 & 0x7fffff);
-{ uint64_t x101 = (x96 + x72);
-{ uint32_t x102 = (uint32_t) (x101 >> 0x17);
-{ uint32_t x103 = ((uint32_t)x101 & 0x7fffff);
-{ uint64_t x104 = (x99 + x79);
-{ uint32_t x105 = (uint32_t) (x104 >> 0x17);
-{ uint32_t x106 = ((uint32_t)x104 & 0x7fffff);
-{ uint64_t x107 = (x102 + x71);
-{ uint32_t x108 = (uint32_t) (x107 >> 0x17);
-{ uint32_t x109 = ((uint32_t)x107 & 0x7fffff);
-{ uint64_t x110 = (x105 + x78);
-{ uint32_t x111 = (uint32_t) (x110 >> 0x17);
-{ uint32_t x112 = ((uint32_t)x110 & 0x7fffff);
-{ uint64_t x113 = (x108 + x70);
-{ uint32_t x114 = (uint32_t) (x113 >> 0x17);
-{ uint32_t x115 = ((uint32_t)x113 & 0x7fffff);
-{ uint64_t x116 = (x111 + x77);
-{ uint32_t x117 = (uint32_t) (x116 >> 0x17);
-{ uint32_t x118 = ((uint32_t)x116 & 0x7fffff);
-{ uint64_t x119 = (x114 + x69);
-{ uint32_t x120 = (uint32_t) (x119 >> 0x17);
-{ uint32_t x121 = ((uint32_t)x119 & 0x7fffff);
-{ uint64_t x122 = (x117 + x76);
-{ uint32_t x123 = (uint32_t) (x122 >> 0x17);
-{ uint32_t x124 = ((uint32_t)x122 & 0x7fffff);
-{ uint32_t x125 = (x120 + x88);
-{ uint32_t x126 = (x125 >> 0x17);
-{ uint32_t x127 = (x125 & 0x7fffff);
-{ uint32_t x128 = (x123 + x83);
-{ uint32_t x129 = (x128 >> 0x17);
-{ uint32_t x130 = (x128 & 0x7fffff);
-{ uint32_t x131 = ((0x800000 * x126) + x127);
-{ uint32_t x132 = (x131 >> 0x17);
-{ uint32_t x133 = (x131 & 0x7fffff);
-{ uint32_t x134 = ((x129 + x91) + x132);
-{ uint32_t x135 = (x134 >> 0x17);
-{ uint32_t x136 = (x134 & 0x7fffff);
-{ uint32_t x137 = (x94 + x132);
-{ uint32_t x138 = (x137 >> 0x17);
-{ uint32_t x139 = (x137 & 0x7fffff);
-out[0] = x133;
-out[1] = x121;
-out[2] = x115;
-out[3] = x109;
-out[4] = x103;
-out[5] = x135 + x97;
-out[6] = x136;
-out[7] = x130;
-out[8] = x124;
-out[9] = x118;
-out[10] = x112;
-out[11] = x106;
-out[12] = x138 + x100;
-out[13] = x139;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[14];
+static void femul(uint32_t out[14], const uint32_t in1[14], const uint32_t in2[14]) {
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x54 = in2[13];
+ { const uint32_t x55 = in2[12];
+ { const uint32_t x53 = in2[11];
+ { const uint32_t x51 = in2[10];
+ { const uint32_t x49 = in2[9];
+ { const uint32_t x47 = in2[8];
+ { const uint32_t x45 = in2[7];
+ { const uint32_t x43 = in2[6];
+ { const uint32_t x41 = in2[5];
+ { const uint32_t x39 = in2[4];
+ { const uint32_t x37 = in2[3];
+ { const uint32_t x35 = in2[2];
+ { const uint32_t x33 = in2[1];
+ { const uint32_t x31 = in2[0];
+ { uint64_t x56 = (((uint64_t)(x17 + x28) * (x43 + x54)) - ((uint64_t)x17 * x43));
+ { uint64_t x57 = ((((uint64_t)(x15 + x29) * (x43 + x54)) + ((uint64_t)(x17 + x28) * (x41 + x55))) - (((uint64_t)x15 * x43) + ((uint64_t)x17 * x41)));
+ { uint64_t x58 = ((((uint64_t)(x13 + x27) * (x43 + x54)) + (((uint64_t)(x15 + x29) * (x41 + x55)) + ((uint64_t)(x17 + x28) * (x39 + x53)))) - (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + ((uint64_t)x17 * x39))));
+ { uint64_t x59 = ((((uint64_t)(x11 + x25) * (x43 + x54)) + (((uint64_t)(x13 + x27) * (x41 + x55)) + (((uint64_t)(x15 + x29) * (x39 + x53)) + ((uint64_t)(x17 + x28) * (x37 + x51))))) - (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + ((uint64_t)x17 * x37)))));
+ { uint64_t x60 = ((((uint64_t)(x9 + x23) * (x43 + x54)) + (((uint64_t)(x11 + x25) * (x41 + x55)) + (((uint64_t)(x13 + x27) * (x39 + x53)) + (((uint64_t)(x15 + x29) * (x37 + x51)) + ((uint64_t)(x17 + x28) * (x35 + x49)))))) - (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((uint64_t)x17 * x35))))));
+ { uint64_t x61 = ((((uint64_t)(x7 + x21) * (x43 + x54)) + (((uint64_t)(x9 + x23) * (x41 + x55)) + (((uint64_t)(x11 + x25) * (x39 + x53)) + (((uint64_t)(x13 + x27) * (x37 + x51)) + (((uint64_t)(x15 + x29) * (x35 + x49)) + ((uint64_t)(x17 + x28) * (x33 + x47))))))) - (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + ((uint64_t)x17 * x33)))))));
+ { uint64_t x62 = ((((uint64_t)(x5 + x19) * (x43 + x54)) + (((uint64_t)(x7 + x21) * (x41 + x55)) + (((uint64_t)(x9 + x23) * (x39 + x53)) + (((uint64_t)(x11 + x25) * (x37 + x51)) + (((uint64_t)(x13 + x27) * (x35 + x49)) + (((uint64_t)(x15 + x29) * (x33 + x47)) + ((uint64_t)(x17 + x28) * (x31 + x45)))))))) - (((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + ((uint64_t)x17 * x31))))))));
+ { uint64_t x63 = ((((uint64_t)(x5 + x19) * (x41 + x55)) + (((uint64_t)(x7 + x21) * (x39 + x53)) + (((uint64_t)(x9 + x23) * (x37 + x51)) + (((uint64_t)(x11 + x25) * (x35 + x49)) + (((uint64_t)(x13 + x27) * (x33 + x47)) + ((uint64_t)(x15 + x29) * (x31 + x45))))))) - (((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + ((uint64_t)x15 * x31)))))));
+ { uint64_t x64 = ((((uint64_t)(x5 + x19) * (x39 + x53)) + (((uint64_t)(x7 + x21) * (x37 + x51)) + (((uint64_t)(x9 + x23) * (x35 + x49)) + (((uint64_t)(x11 + x25) * (x33 + x47)) + ((uint64_t)(x13 + x27) * (x31 + x45)))))) - (((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + ((uint64_t)x13 * x31))))));
+ { uint64_t x65 = ((((uint64_t)(x5 + x19) * (x37 + x51)) + (((uint64_t)(x7 + x21) * (x35 + x49)) + (((uint64_t)(x9 + x23) * (x33 + x47)) + ((uint64_t)(x11 + x25) * (x31 + x45))))) - (((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + ((uint64_t)x11 * x31)))));
+ { uint64_t x66 = ((((uint64_t)(x5 + x19) * (x35 + x49)) + (((uint64_t)(x7 + x21) * (x33 + x47)) + ((uint64_t)(x9 + x23) * (x31 + x45)))) - (((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + ((uint64_t)x9 * x31))));
+ { uint64_t x67 = ((((uint64_t)(x5 + x19) * (x33 + x47)) + ((uint64_t)(x7 + x21) * (x31 + x45))) - (((uint64_t)x5 * x33) + ((uint64_t)x7 * x31)));
+ { uint64_t x68 = (((uint64_t)(x5 + x19) * (x31 + x45)) - ((uint64_t)x5 * x31));
+ { uint64_t x69 = (((((uint64_t)x17 * x43) + ((uint64_t)x28 * x54)) + x63) + x56);
+ { uint64_t x70 = ((((((uint64_t)x15 * x43) + ((uint64_t)x17 * x41)) + (((uint64_t)x29 * x54) + ((uint64_t)x28 * x55))) + x64) + x57);
+ { uint64_t x71 = ((((((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + ((uint64_t)x17 * x39))) + (((uint64_t)x27 * x54) + (((uint64_t)x29 * x55) + ((uint64_t)x28 * x53)))) + x65) + x58);
+ { uint64_t x72 = ((((((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + ((uint64_t)x17 * x37)))) + (((uint64_t)x25 * x54) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + ((uint64_t)x28 * x51))))) + x66) + x59);
+ { uint64_t x73 = ((((((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((uint64_t)x17 * x35))))) + (((uint64_t)x23 * x54) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + (((uint64_t)x29 * x51) + ((uint64_t)x28 * x49)))))) + x67) + x60);
+ { uint64_t x74 = ((((((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + ((uint64_t)x17 * x33)))))) + (((uint64_t)x21 * x54) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + ((uint64_t)x28 * x47))))))) + x68) + x61);
+ { uint64_t x75 = ((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + ((uint64_t)x17 * x31))))))) + (((uint64_t)x19 * x54) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + ((uint64_t)x28 * x45))))))));
+ { uint64_t x76 = (((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + ((uint64_t)x15 * x31)))))) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + ((uint64_t)x29 * x45))))))) + x56);
+ { uint64_t x77 = (((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + ((uint64_t)x13 * x31))))) + (((uint64_t)x19 * x53) + (((uint64_t)x21 * x51) + (((uint64_t)x23 * x49) + (((uint64_t)x25 * x47) + ((uint64_t)x27 * x45)))))) + x57);
+ { uint64_t x78 = (((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + ((uint64_t)x11 * x31)))) + (((uint64_t)x19 * x51) + (((uint64_t)x21 * x49) + (((uint64_t)x23 * x47) + ((uint64_t)x25 * x45))))) + x58);
+ { uint64_t x79 = (((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + ((uint64_t)x9 * x31))) + (((uint64_t)x19 * x49) + (((uint64_t)x21 * x47) + ((uint64_t)x23 * x45)))) + x59);
+ { uint64_t x80 = (((((uint64_t)x5 * x33) + ((uint64_t)x7 * x31)) + (((uint64_t)x19 * x47) + ((uint64_t)x21 * x45))) + x60);
+ { uint64_t x81 = ((((uint64_t)x5 * x31) + ((uint64_t)x19 * x45)) + x61);
+ { uint32_t x82 = (uint32_t) (x75 >> 0x17);
+ { uint32_t x83 = ((uint32_t)x75 & 0x7fffff);
+ { uint32_t x84 = (uint32_t) (x62 >> 0x17);
+ { uint32_t x85 = ((uint32_t)x62 & 0x7fffff);
+ { uint64_t x86 = (((uint64_t)0x800000 * x84) + x85);
+ { uint32_t x87 = (uint32_t) (x86 >> 0x17);
+ { uint32_t x88 = ((uint32_t)x86 & 0x7fffff);
+ { uint64_t x89 = ((x82 + x74) + x87);
+ { uint32_t x90 = (uint32_t) (x89 >> 0x17);
+ { uint32_t x91 = ((uint32_t)x89 & 0x7fffff);
+ { uint64_t x92 = (x81 + x87);
+ { uint32_t x93 = (uint32_t) (x92 >> 0x17);
+ { uint32_t x94 = ((uint32_t)x92 & 0x7fffff);
+ { uint64_t x95 = (x90 + x73);
+ { uint32_t x96 = (uint32_t) (x95 >> 0x17);
+ { uint32_t x97 = ((uint32_t)x95 & 0x7fffff);
+ { uint64_t x98 = (x93 + x80);
+ { uint32_t x99 = (uint32_t) (x98 >> 0x17);
+ { uint32_t x100 = ((uint32_t)x98 & 0x7fffff);
+ { uint64_t x101 = (x96 + x72);
+ { uint32_t x102 = (uint32_t) (x101 >> 0x17);
+ { uint32_t x103 = ((uint32_t)x101 & 0x7fffff);
+ { uint64_t x104 = (x99 + x79);
+ { uint32_t x105 = (uint32_t) (x104 >> 0x17);
+ { uint32_t x106 = ((uint32_t)x104 & 0x7fffff);
+ { uint64_t x107 = (x102 + x71);
+ { uint32_t x108 = (uint32_t) (x107 >> 0x17);
+ { uint32_t x109 = ((uint32_t)x107 & 0x7fffff);
+ { uint64_t x110 = (x105 + x78);
+ { uint32_t x111 = (uint32_t) (x110 >> 0x17);
+ { uint32_t x112 = ((uint32_t)x110 & 0x7fffff);
+ { uint64_t x113 = (x108 + x70);
+ { uint32_t x114 = (uint32_t) (x113 >> 0x17);
+ { uint32_t x115 = ((uint32_t)x113 & 0x7fffff);
+ { uint64_t x116 = (x111 + x77);
+ { uint32_t x117 = (uint32_t) (x116 >> 0x17);
+ { uint32_t x118 = ((uint32_t)x116 & 0x7fffff);
+ { uint64_t x119 = (x114 + x69);
+ { uint32_t x120 = (uint32_t) (x119 >> 0x17);
+ { uint32_t x121 = ((uint32_t)x119 & 0x7fffff);
+ { uint64_t x122 = (x117 + x76);
+ { uint32_t x123 = (uint32_t) (x122 >> 0x17);
+ { uint32_t x124 = ((uint32_t)x122 & 0x7fffff);
+ { uint32_t x125 = (x120 + x88);
+ { uint32_t x126 = (x125 >> 0x17);
+ { uint32_t x127 = (x125 & 0x7fffff);
+ { uint32_t x128 = (x123 + x83);
+ { uint32_t x129 = (x128 >> 0x17);
+ { uint32_t x130 = (x128 & 0x7fffff);
+ { uint32_t x131 = ((0x800000 * x126) + x127);
+ { uint32_t x132 = (x131 >> 0x17);
+ { uint32_t x133 = (x131 & 0x7fffff);
+ { uint32_t x134 = ((x129 + x91) + x132);
+ { uint32_t x135 = (x134 >> 0x17);
+ { uint32_t x136 = (x134 & 0x7fffff);
+ { uint32_t x137 = (x94 + x132);
+ { uint32_t x138 = (x137 >> 0x17);
+ { uint32_t x139 = (x137 & 0x7fffff);
+ out[0] = x139;
+ out[1] = (x138 + x100);
+ out[2] = x106;
+ out[3] = x112;
+ out[4] = x118;
+ out[5] = x124;
+ out[6] = x130;
+ out[7] = x136;
+ out[8] = (x135 + x97);
+ out[9] = x103;
+ out[10] = x109;
+ out[11] = x115;
+ out[12] = x121;
+ out[13] = x133;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e322m2e161m1/fesquare.c b/src/Specific/solinas32_2e322m2e161m1/fesquare.c
index d745c603b..e1d4a6c5a 100644
--- a/src/Specific/solinas32_2e322m2e161m1/fesquare.c
+++ b/src/Specific/solinas32_2e322m2e161m1/fesquare.c
@@ -1,119 +1,115 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x25, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x27 = (((uint64_t)(x14 + x25) * (x14 + x25)) - ((uint64_t)x14 * x14));
-{ uint64_t x28 = ((((uint64_t)(x12 + x26) * (x14 + x25)) + ((uint64_t)(x14 + x25) * (x12 + x26))) - (((uint64_t)x12 * x14) + ((uint64_t)x14 * x12)));
-{ uint64_t x29 = ((((uint64_t)(x10 + x24) * (x14 + x25)) + (((uint64_t)(x12 + x26) * (x12 + x26)) + ((uint64_t)(x14 + x25) * (x10 + x24)))) - (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + ((uint64_t)x14 * x10))));
-{ uint64_t x30 = ((((uint64_t)(x8 + x22) * (x14 + x25)) + (((uint64_t)(x10 + x24) * (x12 + x26)) + (((uint64_t)(x12 + x26) * (x10 + x24)) + ((uint64_t)(x14 + x25) * (x8 + x22))))) - (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + ((uint64_t)x14 * x8)))));
-{ uint64_t x31 = ((((uint64_t)(x6 + x20) * (x14 + x25)) + (((uint64_t)(x8 + x22) * (x12 + x26)) + (((uint64_t)(x10 + x24) * (x10 + x24)) + (((uint64_t)(x12 + x26) * (x8 + x22)) + ((uint64_t)(x14 + x25) * (x6 + x20)))))) - (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + ((uint64_t)x14 * x6))))));
-{ uint64_t x32 = ((((uint64_t)(x4 + x18) * (x14 + x25)) + (((uint64_t)(x6 + x20) * (x12 + x26)) + (((uint64_t)(x8 + x22) * (x10 + x24)) + (((uint64_t)(x10 + x24) * (x8 + x22)) + (((uint64_t)(x12 + x26) * (x6 + x20)) + ((uint64_t)(x14 + x25) * (x4 + x18))))))) - (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + ((uint64_t)x14 * x4)))))));
-{ uint64_t x33 = ((((uint64_t)(x2 + x16) * (x14 + x25)) + (((uint64_t)(x4 + x18) * (x12 + x26)) + (((uint64_t)(x6 + x20) * (x10 + x24)) + (((uint64_t)(x8 + x22) * (x8 + x22)) + (((uint64_t)(x10 + x24) * (x6 + x20)) + (((uint64_t)(x12 + x26) * (x4 + x18)) + ((uint64_t)(x14 + x25) * (x2 + x16)))))))) - (((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))));
-{ uint64_t x34 = ((((uint64_t)(x2 + x16) * (x12 + x26)) + (((uint64_t)(x4 + x18) * (x10 + x24)) + (((uint64_t)(x6 + x20) * (x8 + x22)) + (((uint64_t)(x8 + x22) * (x6 + x20)) + (((uint64_t)(x10 + x24) * (x4 + x18)) + ((uint64_t)(x12 + x26) * (x2 + x16))))))) - (((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))));
-{ uint64_t x35 = ((((uint64_t)(x2 + x16) * (x10 + x24)) + (((uint64_t)(x4 + x18) * (x8 + x22)) + (((uint64_t)(x6 + x20) * (x6 + x20)) + (((uint64_t)(x8 + x22) * (x4 + x18)) + ((uint64_t)(x10 + x24) * (x2 + x16)))))) - (((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))));
-{ uint64_t x36 = ((((uint64_t)(x2 + x16) * (x8 + x22)) + (((uint64_t)(x4 + x18) * (x6 + x20)) + (((uint64_t)(x6 + x20) * (x4 + x18)) + ((uint64_t)(x8 + x22) * (x2 + x16))))) - (((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))));
-{ uint64_t x37 = ((((uint64_t)(x2 + x16) * (x6 + x20)) + (((uint64_t)(x4 + x18) * (x4 + x18)) + ((uint64_t)(x6 + x20) * (x2 + x16)))) - (((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))));
-{ uint64_t x38 = ((((uint64_t)(x2 + x16) * (x4 + x18)) + ((uint64_t)(x4 + x18) * (x2 + x16))) - (((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)));
-{ uint64_t x39 = (((uint64_t)(x2 + x16) * (x2 + x16)) - ((uint64_t)x2 * x2));
-{ uint64_t x40 = (((((uint64_t)x14 * x14) + ((uint64_t)x25 * x25)) + x34) + x27);
-{ uint64_t x41 = ((((((uint64_t)x12 * x14) + ((uint64_t)x14 * x12)) + (((uint64_t)x26 * x25) + ((uint64_t)x25 * x26))) + x35) + x28);
-{ uint64_t x42 = ((((((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + ((uint64_t)x14 * x10))) + (((uint64_t)x24 * x25) + (((uint64_t)x26 * x26) + ((uint64_t)x25 * x24)))) + x36) + x29);
-{ uint64_t x43 = ((((((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + ((uint64_t)x14 * x8)))) + (((uint64_t)x22 * x25) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + ((uint64_t)x25 * x22))))) + x37) + x30);
-{ uint64_t x44 = ((((((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + ((uint64_t)x14 * x6))))) + (((uint64_t)x20 * x25) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + ((uint64_t)x25 * x20)))))) + x38) + x31);
-{ uint64_t x45 = ((((((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + ((uint64_t)x14 * x4)))))) + (((uint64_t)x18 * x25) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + ((uint64_t)x25 * x18))))))) + x39) + x32);
-{ uint64_t x46 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (((uint64_t)x16 * x25) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + (((uint64_t)x26 * x18) + ((uint64_t)x25 * x16))))))));
-{ uint64_t x47 = (((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + ((uint64_t)x26 * x16))))))) + x27);
-{ uint64_t x48 = (((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + ((uint64_t)x24 * x16)))))) + x28);
-{ uint64_t x49 = (((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + ((uint64_t)x22 * x16))))) + x29);
-{ uint64_t x50 = (((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + ((uint64_t)x20 * x16)))) + x30);
-{ uint64_t x51 = (((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (((uint64_t)x16 * x18) + ((uint64_t)x18 * x16))) + x31);
-{ uint64_t x52 = ((((uint64_t)x2 * x2) + ((uint64_t)x16 * x16)) + x32);
-{ uint32_t x53 = (uint32_t) (x46 >> 0x17);
-{ uint32_t x54 = ((uint32_t)x46 & 0x7fffff);
-{ uint32_t x55 = (uint32_t) (x33 >> 0x17);
-{ uint32_t x56 = ((uint32_t)x33 & 0x7fffff);
-{ uint64_t x57 = (((uint64_t)0x800000 * x55) + x56);
-{ uint32_t x58 = (uint32_t) (x57 >> 0x17);
-{ uint32_t x59 = ((uint32_t)x57 & 0x7fffff);
-{ uint64_t x60 = ((x53 + x45) + x58);
-{ uint32_t x61 = (uint32_t) (x60 >> 0x17);
-{ uint32_t x62 = ((uint32_t)x60 & 0x7fffff);
-{ uint64_t x63 = (x52 + x58);
-{ uint32_t x64 = (uint32_t) (x63 >> 0x17);
-{ uint32_t x65 = ((uint32_t)x63 & 0x7fffff);
-{ uint64_t x66 = (x61 + x44);
-{ uint32_t x67 = (uint32_t) (x66 >> 0x17);
-{ uint32_t x68 = ((uint32_t)x66 & 0x7fffff);
-{ uint64_t x69 = (x64 + x51);
-{ uint32_t x70 = (uint32_t) (x69 >> 0x17);
-{ uint32_t x71 = ((uint32_t)x69 & 0x7fffff);
-{ uint64_t x72 = (x67 + x43);
-{ uint32_t x73 = (uint32_t) (x72 >> 0x17);
-{ uint32_t x74 = ((uint32_t)x72 & 0x7fffff);
-{ uint64_t x75 = (x70 + x50);
-{ uint32_t x76 = (uint32_t) (x75 >> 0x17);
-{ uint32_t x77 = ((uint32_t)x75 & 0x7fffff);
-{ uint64_t x78 = (x73 + x42);
-{ uint32_t x79 = (uint32_t) (x78 >> 0x17);
-{ uint32_t x80 = ((uint32_t)x78 & 0x7fffff);
-{ uint64_t x81 = (x76 + x49);
-{ uint32_t x82 = (uint32_t) (x81 >> 0x17);
-{ uint32_t x83 = ((uint32_t)x81 & 0x7fffff);
-{ uint64_t x84 = (x79 + x41);
-{ uint32_t x85 = (uint32_t) (x84 >> 0x17);
-{ uint32_t x86 = ((uint32_t)x84 & 0x7fffff);
-{ uint64_t x87 = (x82 + x48);
-{ uint32_t x88 = (uint32_t) (x87 >> 0x17);
-{ uint32_t x89 = ((uint32_t)x87 & 0x7fffff);
-{ uint64_t x90 = (x85 + x40);
-{ uint32_t x91 = (uint32_t) (x90 >> 0x17);
-{ uint32_t x92 = ((uint32_t)x90 & 0x7fffff);
-{ uint64_t x93 = (x88 + x47);
-{ uint32_t x94 = (uint32_t) (x93 >> 0x17);
-{ uint32_t x95 = ((uint32_t)x93 & 0x7fffff);
-{ uint32_t x96 = (x91 + x59);
-{ uint32_t x97 = (x96 >> 0x17);
-{ uint32_t x98 = (x96 & 0x7fffff);
-{ uint32_t x99 = (x94 + x54);
-{ uint32_t x100 = (x99 >> 0x17);
-{ uint32_t x101 = (x99 & 0x7fffff);
-{ uint32_t x102 = ((0x800000 * x97) + x98);
-{ uint32_t x103 = (x102 >> 0x17);
-{ uint32_t x104 = (x102 & 0x7fffff);
-{ uint32_t x105 = ((x100 + x62) + x103);
-{ uint32_t x106 = (x105 >> 0x17);
-{ uint32_t x107 = (x105 & 0x7fffff);
-{ uint32_t x108 = (x65 + x103);
-{ uint32_t x109 = (x108 >> 0x17);
-{ uint32_t x110 = (x108 & 0x7fffff);
-out[0] = x104;
-out[1] = x92;
-out[2] = x86;
-out[3] = x80;
-out[4] = x74;
-out[5] = x106 + x68;
-out[6] = x107;
-out[7] = x101;
-out[8] = x95;
-out[9] = x89;
-out[10] = x83;
-out[11] = x77;
-out[12] = x109 + x71;
-out[13] = x110;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[14];
+static void fesquare(uint32_t out[14], const uint32_t in1[14]) {
+ { const uint32_t x25 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x27 = (((uint64_t)(x14 + x25) * (x14 + x25)) - ((uint64_t)x14 * x14));
+ { uint64_t x28 = ((((uint64_t)(x12 + x26) * (x14 + x25)) + ((uint64_t)(x14 + x25) * (x12 + x26))) - (((uint64_t)x12 * x14) + ((uint64_t)x14 * x12)));
+ { uint64_t x29 = ((((uint64_t)(x10 + x24) * (x14 + x25)) + (((uint64_t)(x12 + x26) * (x12 + x26)) + ((uint64_t)(x14 + x25) * (x10 + x24)))) - (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + ((uint64_t)x14 * x10))));
+ { uint64_t x30 = ((((uint64_t)(x8 + x22) * (x14 + x25)) + (((uint64_t)(x10 + x24) * (x12 + x26)) + (((uint64_t)(x12 + x26) * (x10 + x24)) + ((uint64_t)(x14 + x25) * (x8 + x22))))) - (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + ((uint64_t)x14 * x8)))));
+ { uint64_t x31 = ((((uint64_t)(x6 + x20) * (x14 + x25)) + (((uint64_t)(x8 + x22) * (x12 + x26)) + (((uint64_t)(x10 + x24) * (x10 + x24)) + (((uint64_t)(x12 + x26) * (x8 + x22)) + ((uint64_t)(x14 + x25) * (x6 + x20)))))) - (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + ((uint64_t)x14 * x6))))));
+ { uint64_t x32 = ((((uint64_t)(x4 + x18) * (x14 + x25)) + (((uint64_t)(x6 + x20) * (x12 + x26)) + (((uint64_t)(x8 + x22) * (x10 + x24)) + (((uint64_t)(x10 + x24) * (x8 + x22)) + (((uint64_t)(x12 + x26) * (x6 + x20)) + ((uint64_t)(x14 + x25) * (x4 + x18))))))) - (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + ((uint64_t)x14 * x4)))))));
+ { uint64_t x33 = ((((uint64_t)(x2 + x16) * (x14 + x25)) + (((uint64_t)(x4 + x18) * (x12 + x26)) + (((uint64_t)(x6 + x20) * (x10 + x24)) + (((uint64_t)(x8 + x22) * (x8 + x22)) + (((uint64_t)(x10 + x24) * (x6 + x20)) + (((uint64_t)(x12 + x26) * (x4 + x18)) + ((uint64_t)(x14 + x25) * (x2 + x16)))))))) - (((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))));
+ { uint64_t x34 = ((((uint64_t)(x2 + x16) * (x12 + x26)) + (((uint64_t)(x4 + x18) * (x10 + x24)) + (((uint64_t)(x6 + x20) * (x8 + x22)) + (((uint64_t)(x8 + x22) * (x6 + x20)) + (((uint64_t)(x10 + x24) * (x4 + x18)) + ((uint64_t)(x12 + x26) * (x2 + x16))))))) - (((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))));
+ { uint64_t x35 = ((((uint64_t)(x2 + x16) * (x10 + x24)) + (((uint64_t)(x4 + x18) * (x8 + x22)) + (((uint64_t)(x6 + x20) * (x6 + x20)) + (((uint64_t)(x8 + x22) * (x4 + x18)) + ((uint64_t)(x10 + x24) * (x2 + x16)))))) - (((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))));
+ { uint64_t x36 = ((((uint64_t)(x2 + x16) * (x8 + x22)) + (((uint64_t)(x4 + x18) * (x6 + x20)) + (((uint64_t)(x6 + x20) * (x4 + x18)) + ((uint64_t)(x8 + x22) * (x2 + x16))))) - (((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))));
+ { uint64_t x37 = ((((uint64_t)(x2 + x16) * (x6 + x20)) + (((uint64_t)(x4 + x18) * (x4 + x18)) + ((uint64_t)(x6 + x20) * (x2 + x16)))) - (((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))));
+ { uint64_t x38 = ((((uint64_t)(x2 + x16) * (x4 + x18)) + ((uint64_t)(x4 + x18) * (x2 + x16))) - (((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)));
+ { uint64_t x39 = (((uint64_t)(x2 + x16) * (x2 + x16)) - ((uint64_t)x2 * x2));
+ { uint64_t x40 = (((((uint64_t)x14 * x14) + ((uint64_t)x25 * x25)) + x34) + x27);
+ { uint64_t x41 = ((((((uint64_t)x12 * x14) + ((uint64_t)x14 * x12)) + (((uint64_t)x26 * x25) + ((uint64_t)x25 * x26))) + x35) + x28);
+ { uint64_t x42 = ((((((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + ((uint64_t)x14 * x10))) + (((uint64_t)x24 * x25) + (((uint64_t)x26 * x26) + ((uint64_t)x25 * x24)))) + x36) + x29);
+ { uint64_t x43 = ((((((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + ((uint64_t)x14 * x8)))) + (((uint64_t)x22 * x25) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + ((uint64_t)x25 * x22))))) + x37) + x30);
+ { uint64_t x44 = ((((((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + ((uint64_t)x14 * x6))))) + (((uint64_t)x20 * x25) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + ((uint64_t)x25 * x20)))))) + x38) + x31);
+ { uint64_t x45 = ((((((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + ((uint64_t)x14 * x4)))))) + (((uint64_t)x18 * x25) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + ((uint64_t)x25 * x18))))))) + x39) + x32);
+ { uint64_t x46 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (((uint64_t)x16 * x25) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + (((uint64_t)x26 * x18) + ((uint64_t)x25 * x16))))))));
+ { uint64_t x47 = (((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + ((uint64_t)x26 * x16))))))) + x27);
+ { uint64_t x48 = (((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + ((uint64_t)x24 * x16)))))) + x28);
+ { uint64_t x49 = (((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + ((uint64_t)x22 * x16))))) + x29);
+ { uint64_t x50 = (((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + ((uint64_t)x20 * x16)))) + x30);
+ { uint64_t x51 = (((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (((uint64_t)x16 * x18) + ((uint64_t)x18 * x16))) + x31);
+ { uint64_t x52 = ((((uint64_t)x2 * x2) + ((uint64_t)x16 * x16)) + x32);
+ { uint32_t x53 = (uint32_t) (x46 >> 0x17);
+ { uint32_t x54 = ((uint32_t)x46 & 0x7fffff);
+ { uint32_t x55 = (uint32_t) (x33 >> 0x17);
+ { uint32_t x56 = ((uint32_t)x33 & 0x7fffff);
+ { uint64_t x57 = (((uint64_t)0x800000 * x55) + x56);
+ { uint32_t x58 = (uint32_t) (x57 >> 0x17);
+ { uint32_t x59 = ((uint32_t)x57 & 0x7fffff);
+ { uint64_t x60 = ((x53 + x45) + x58);
+ { uint32_t x61 = (uint32_t) (x60 >> 0x17);
+ { uint32_t x62 = ((uint32_t)x60 & 0x7fffff);
+ { uint64_t x63 = (x52 + x58);
+ { uint32_t x64 = (uint32_t) (x63 >> 0x17);
+ { uint32_t x65 = ((uint32_t)x63 & 0x7fffff);
+ { uint64_t x66 = (x61 + x44);
+ { uint32_t x67 = (uint32_t) (x66 >> 0x17);
+ { uint32_t x68 = ((uint32_t)x66 & 0x7fffff);
+ { uint64_t x69 = (x64 + x51);
+ { uint32_t x70 = (uint32_t) (x69 >> 0x17);
+ { uint32_t x71 = ((uint32_t)x69 & 0x7fffff);
+ { uint64_t x72 = (x67 + x43);
+ { uint32_t x73 = (uint32_t) (x72 >> 0x17);
+ { uint32_t x74 = ((uint32_t)x72 & 0x7fffff);
+ { uint64_t x75 = (x70 + x50);
+ { uint32_t x76 = (uint32_t) (x75 >> 0x17);
+ { uint32_t x77 = ((uint32_t)x75 & 0x7fffff);
+ { uint64_t x78 = (x73 + x42);
+ { uint32_t x79 = (uint32_t) (x78 >> 0x17);
+ { uint32_t x80 = ((uint32_t)x78 & 0x7fffff);
+ { uint64_t x81 = (x76 + x49);
+ { uint32_t x82 = (uint32_t) (x81 >> 0x17);
+ { uint32_t x83 = ((uint32_t)x81 & 0x7fffff);
+ { uint64_t x84 = (x79 + x41);
+ { uint32_t x85 = (uint32_t) (x84 >> 0x17);
+ { uint32_t x86 = ((uint32_t)x84 & 0x7fffff);
+ { uint64_t x87 = (x82 + x48);
+ { uint32_t x88 = (uint32_t) (x87 >> 0x17);
+ { uint32_t x89 = ((uint32_t)x87 & 0x7fffff);
+ { uint64_t x90 = (x85 + x40);
+ { uint32_t x91 = (uint32_t) (x90 >> 0x17);
+ { uint32_t x92 = ((uint32_t)x90 & 0x7fffff);
+ { uint64_t x93 = (x88 + x47);
+ { uint32_t x94 = (uint32_t) (x93 >> 0x17);
+ { uint32_t x95 = ((uint32_t)x93 & 0x7fffff);
+ { uint32_t x96 = (x91 + x59);
+ { uint32_t x97 = (x96 >> 0x17);
+ { uint32_t x98 = (x96 & 0x7fffff);
+ { uint32_t x99 = (x94 + x54);
+ { uint32_t x100 = (x99 >> 0x17);
+ { uint32_t x101 = (x99 & 0x7fffff);
+ { uint32_t x102 = ((0x800000 * x97) + x98);
+ { uint32_t x103 = (x102 >> 0x17);
+ { uint32_t x104 = (x102 & 0x7fffff);
+ { uint32_t x105 = ((x100 + x62) + x103);
+ { uint32_t x106 = (x105 >> 0x17);
+ { uint32_t x107 = (x105 & 0x7fffff);
+ { uint32_t x108 = (x65 + x103);
+ { uint32_t x109 = (x108 >> 0x17);
+ { uint32_t x110 = (x108 & 0x7fffff);
+ out[0] = x110;
+ out[1] = (x109 + x71);
+ out[2] = x77;
+ out[3] = x83;
+ out[4] = x89;
+ out[5] = x95;
+ out[6] = x101;
+ out[7] = x107;
+ out[8] = (x106 + x68);
+ out[9] = x74;
+ out[10] = x80;
+ out[11] = x86;
+ out[12] = x92;
+ out[13] = x104;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e322m2e161m1/freeze.c b/src/Specific/solinas32_2e322m2e161m1/freeze.c
index b157048ca..744989782 100644
--- a/src/Specific/solinas32_2e322m2e161m1/freeze.c
+++ b/src/Specific/solinas32_2e322m2e161m1/freeze.c
@@ -1,25 +1,74 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x25, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x28;
-out[1] = uint8_t x29 = Op Syntax.SubWithGetBorrow 23 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7fffff;;
+static void freeze(uint32_t out[14], const uint32_t in1[14]) {
+ { const uint32_t x25 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x7fffff);
+ { uint32_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x29, Return x4, 0x7fffff);
+ { uint32_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x32, Return x6, 0x7fffff);
+ { uint32_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x35, Return x8, 0x7fffff);
+ { uint32_t x40, uint8_t x41 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x38, Return x10, 0x7fffff);
+ { uint32_t x43, uint8_t x44 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x41, Return x12, 0x7fffff);
+ { uint32_t x46, uint8_t x47 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x44, Return x14, 0x7fffff);
+ { uint32_t x49, uint8_t x50 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x47, Return x16, 0x7ffffe);
+ { uint32_t x52, uint8_t x53 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x50, Return x18, 0x7fffff);
+ { uint32_t x55, uint8_t x56 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x53, Return x20, 0x7fffff);
+ { uint32_t x58, uint8_t x59 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x56, Return x22, 0x7fffff);
+ { uint32_t x61, uint8_t x62 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x59, Return x24, 0x7fffff);
+ { uint32_t x64, uint8_t x65 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x62, Return x26, 0x7fffff);
+ { uint32_t x67, uint8_t x68 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x65, Return x25, 0x7fffff);
+ { uint32_t x69 = (uint32_t)cmovznz(x68, 0x0, 0xffffffff);
+ { uint32_t x70 = (x69 & 0x7fffff);
+ { uint32_t x72, uint8_t x73 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x28, Return x70);
+ { uint32_t x74 = (x69 & 0x7fffff);
+ { uint32_t x76, uint8_t x77 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x73, Return x31, Return x74);
+ { uint32_t x78 = (x69 & 0x7fffff);
+ { uint32_t x80, uint8_t x81 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x77, Return x34, Return x78);
+ { uint32_t x82 = (x69 & 0x7fffff);
+ { uint32_t x84, uint8_t x85 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x81, Return x37, Return x82);
+ { uint32_t x86 = (x69 & 0x7fffff);
+ { uint32_t x88, uint8_t x89 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x85, Return x40, Return x86);
+ { uint32_t x90 = (x69 & 0x7fffff);
+ { uint32_t x92, uint8_t x93 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x89, Return x43, Return x90);
+ { uint32_t x94 = (x69 & 0x7fffff);
+ { uint32_t x96, uint8_t x97 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x93, Return x46, Return x94);
+ { uint32_t x98 = (x69 & 0x7ffffe);
+ { uint32_t x100, uint8_t x101 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x97, Return x49, Return x98);
+ { uint32_t x102 = (x69 & 0x7fffff);
+ { uint32_t x104, uint8_t x105 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x101, Return x52, Return x102);
+ { uint32_t x106 = (x69 & 0x7fffff);
+ { uint32_t x108, uint8_t x109 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x105, Return x55, Return x106);
+ { uint32_t x110 = (x69 & 0x7fffff);
+ { uint32_t x112, uint8_t x113 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x109, Return x58, Return x110);
+ { uint32_t x114 = (x69 & 0x7fffff);
+ { uint32_t x116, uint8_t x117 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x113, Return x61, Return x114);
+ { uint32_t x118 = (x69 & 0x7fffff);
+ { uint32_t x120, uint8_t x121 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x117, Return x64, Return x118);
+ { uint32_t x122 = (x69 & 0x7fffff);
+ { uint32_t x124, uint8_t _ = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x121, Return x67, Return x122);
+ out[0] = x72;
+ out[1] = x76;
+ out[2] = x80;
+ out[3] = x84;
+ out[4] = x88;
+ out[5] = x92;
+ out[6] = x96;
+ out[7] = x100;
+ out[8] = x104;
+ out[9] = x108;
+ out[10] = x112;
+ out[11] = x116;
+ out[12] = x120;
+ out[13] = x124;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e336m17/femul.c b/src/Specific/solinas32_2e336m17/femul.c
index c7117de97..eb775fe27 100644
--- a/src/Specific/solinas32_2e336m17/femul.c
+++ b/src/Specific/solinas32_2e336m17/femul.c
@@ -1,96 +1,106 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x28, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x54, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31)
-{ uint64_t x56 = (((uint64_t)x5 * x54) + (((uint64_t)x7 * x55) + (((uint64_t)x9 * x53) + (((uint64_t)x11 * x51) + (((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + (((uint64_t)x23 * x39) + (((uint64_t)x25 * x37) + (((uint64_t)x27 * x35) + (((uint64_t)x29 * x33) + ((uint64_t)x28 * x31))))))))))))));
-{ uint64_t x57 = ((((uint64_t)x5 * x55) + (((uint64_t)x7 * x53) + (((uint64_t)x9 * x51) + (((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + (((uint64_t)x25 * x35) + (((uint64_t)x27 * x33) + ((uint64_t)x29 * x31))))))))))))) + (0x11 * ((uint64_t)x28 * x54)));
-{ uint64_t x58 = ((((uint64_t)x5 * x53) + (((uint64_t)x7 * x51) + (((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x27 * x31)))))))))))) + (0x11 * (((uint64_t)x29 * x54) + ((uint64_t)x28 * x55))));
-{ uint64_t x59 = ((((uint64_t)x5 * x51) + (((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + (((uint64_t)x23 * x33) + ((uint64_t)x25 * x31))))))))))) + (0x11 * (((uint64_t)x27 * x54) + (((uint64_t)x29 * x55) + ((uint64_t)x28 * x53)))));
-{ uint64_t x60 = ((((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((uint64_t)x23 * x31)))))))))) + (0x11 * (((uint64_t)x25 * x54) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + ((uint64_t)x28 * x51))))));
-{ uint64_t x61 = ((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x21 * x31))))))))) + (0x11 * (((uint64_t)x23 * x54) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + (((uint64_t)x29 * x51) + ((uint64_t)x28 * x49)))))));
-{ uint64_t x62 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + ((uint64_t)x19 * x31)))))))) + (0x11 * (((uint64_t)x21 * x54) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + ((uint64_t)x28 * x47))))))));
-{ uint64_t x63 = ((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + ((uint64_t)x17 * x31))))))) + (0x11 * (((uint64_t)x19 * x54) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + ((uint64_t)x28 * x45)))))))));
-{ uint64_t x64 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + ((uint64_t)x15 * x31)))))) + (0x11 * (((uint64_t)x17 * x54) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + ((uint64_t)x28 * x43))))))))));
-{ uint64_t x65 = ((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + ((uint64_t)x13 * x31))))) + (0x11 * (((uint64_t)x15 * x54) + (((uint64_t)x17 * x55) + (((uint64_t)x19 * x53) + (((uint64_t)x21 * x51) + (((uint64_t)x23 * x49) + (((uint64_t)x25 * x47) + (((uint64_t)x27 * x45) + (((uint64_t)x29 * x43) + ((uint64_t)x28 * x41)))))))))));
-{ uint64_t x66 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + ((uint64_t)x11 * x31)))) + (0x11 * (((uint64_t)x13 * x54) + (((uint64_t)x15 * x55) + (((uint64_t)x17 * x53) + (((uint64_t)x19 * x51) + (((uint64_t)x21 * x49) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + (((uint64_t)x27 * x43) + (((uint64_t)x29 * x41) + ((uint64_t)x28 * x39))))))))))));
-{ uint64_t x67 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + ((uint64_t)x9 * x31))) + (0x11 * (((uint64_t)x11 * x54) + (((uint64_t)x13 * x55) + (((uint64_t)x15 * x53) + (((uint64_t)x17 * x51) + (((uint64_t)x19 * x49) + (((uint64_t)x21 * x47) + (((uint64_t)x23 * x45) + (((uint64_t)x25 * x43) + (((uint64_t)x27 * x41) + (((uint64_t)x29 * x39) + ((uint64_t)x28 * x37)))))))))))));
-{ uint64_t x68 = ((((uint64_t)x5 * x33) + ((uint64_t)x7 * x31)) + (0x11 * (((uint64_t)x9 * x54) + (((uint64_t)x11 * x55) + (((uint64_t)x13 * x53) + (((uint64_t)x15 * x51) + (((uint64_t)x17 * x49) + (((uint64_t)x19 * x47) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + (((uint64_t)x27 * x39) + (((uint64_t)x29 * x37) + ((uint64_t)x28 * x35))))))))))))));
-{ uint64_t x69 = (((uint64_t)x5 * x31) + (0x11 * (((uint64_t)x7 * x54) + (((uint64_t)x9 * x55) + (((uint64_t)x11 * x53) + (((uint64_t)x13 * x51) + (((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + (((uint64_t)x19 * x45) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + (((uint64_t)x25 * x39) + (((uint64_t)x27 * x37) + (((uint64_t)x29 * x35) + ((uint64_t)x28 * x33)))))))))))))));
-{ uint64_t x70 = (x69 >> 0x18);
-{ uint32_t x71 = ((uint32_t)x69 & 0xffffff);
-{ uint64_t x72 = (x70 + x68);
-{ uint64_t x73 = (x72 >> 0x18);
-{ uint32_t x74 = ((uint32_t)x72 & 0xffffff);
-{ uint64_t x75 = (x73 + x67);
-{ uint64_t x76 = (x75 >> 0x18);
-{ uint32_t x77 = ((uint32_t)x75 & 0xffffff);
-{ uint64_t x78 = (x76 + x66);
-{ uint64_t x79 = (x78 >> 0x18);
-{ uint32_t x80 = ((uint32_t)x78 & 0xffffff);
-{ uint64_t x81 = (x79 + x65);
-{ uint64_t x82 = (x81 >> 0x18);
-{ uint32_t x83 = ((uint32_t)x81 & 0xffffff);
-{ uint64_t x84 = (x82 + x64);
-{ uint64_t x85 = (x84 >> 0x18);
-{ uint32_t x86 = ((uint32_t)x84 & 0xffffff);
-{ uint64_t x87 = (x85 + x63);
-{ uint64_t x88 = (x87 >> 0x18);
-{ uint32_t x89 = ((uint32_t)x87 & 0xffffff);
-{ uint64_t x90 = (x88 + x62);
-{ uint64_t x91 = (x90 >> 0x18);
-{ uint32_t x92 = ((uint32_t)x90 & 0xffffff);
-{ uint64_t x93 = (x91 + x61);
-{ uint64_t x94 = (x93 >> 0x18);
-{ uint32_t x95 = ((uint32_t)x93 & 0xffffff);
-{ uint64_t x96 = (x94 + x60);
-{ uint64_t x97 = (x96 >> 0x18);
-{ uint32_t x98 = ((uint32_t)x96 & 0xffffff);
-{ uint64_t x99 = (x97 + x59);
-{ uint64_t x100 = (x99 >> 0x18);
-{ uint32_t x101 = ((uint32_t)x99 & 0xffffff);
-{ uint64_t x102 = (x100 + x58);
-{ uint64_t x103 = (x102 >> 0x18);
-{ uint32_t x104 = ((uint32_t)x102 & 0xffffff);
-{ uint64_t x105 = (x103 + x57);
-{ uint64_t x106 = (x105 >> 0x18);
-{ uint32_t x107 = ((uint32_t)x105 & 0xffffff);
-{ uint64_t x108 = (x106 + x56);
-{ uint32_t x109 = (uint32_t) (x108 >> 0x18);
-{ uint32_t x110 = ((uint32_t)x108 & 0xffffff);
-{ uint64_t x111 = (x71 + ((uint64_t)0x11 * x109));
-{ uint32_t x112 = (uint32_t) (x111 >> 0x18);
-{ uint32_t x113 = ((uint32_t)x111 & 0xffffff);
-{ uint32_t x114 = (x112 + x74);
-{ uint32_t x115 = (x114 >> 0x18);
-{ uint32_t x116 = (x114 & 0xffffff);
-out[0] = x110;
-out[1] = x107;
-out[2] = x104;
-out[3] = x101;
-out[4] = x98;
-out[5] = x95;
-out[6] = x92;
-out[7] = x89;
-out[8] = x86;
-out[9] = x83;
-out[10] = x80;
-out[11] = x115 + x77;
-out[12] = x116;
-out[13] = x113;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[14];
+static void femul(uint32_t out[14], const uint32_t in1[14], const uint32_t in2[14]) {
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x54 = in2[13];
+ { const uint32_t x55 = in2[12];
+ { const uint32_t x53 = in2[11];
+ { const uint32_t x51 = in2[10];
+ { const uint32_t x49 = in2[9];
+ { const uint32_t x47 = in2[8];
+ { const uint32_t x45 = in2[7];
+ { const uint32_t x43 = in2[6];
+ { const uint32_t x41 = in2[5];
+ { const uint32_t x39 = in2[4];
+ { const uint32_t x37 = in2[3];
+ { const uint32_t x35 = in2[2];
+ { const uint32_t x33 = in2[1];
+ { const uint32_t x31 = in2[0];
+ { uint64_t x56 = (((uint64_t)x5 * x54) + (((uint64_t)x7 * x55) + (((uint64_t)x9 * x53) + (((uint64_t)x11 * x51) + (((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + (((uint64_t)x23 * x39) + (((uint64_t)x25 * x37) + (((uint64_t)x27 * x35) + (((uint64_t)x29 * x33) + ((uint64_t)x28 * x31))))))))))))));
+ { uint64_t x57 = ((((uint64_t)x5 * x55) + (((uint64_t)x7 * x53) + (((uint64_t)x9 * x51) + (((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + (((uint64_t)x25 * x35) + (((uint64_t)x27 * x33) + ((uint64_t)x29 * x31))))))))))))) + (0x11 * ((uint64_t)x28 * x54)));
+ { uint64_t x58 = ((((uint64_t)x5 * x53) + (((uint64_t)x7 * x51) + (((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x27 * x31)))))))))))) + (0x11 * (((uint64_t)x29 * x54) + ((uint64_t)x28 * x55))));
+ { uint64_t x59 = ((((uint64_t)x5 * x51) + (((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + (((uint64_t)x23 * x33) + ((uint64_t)x25 * x31))))))))))) + (0x11 * (((uint64_t)x27 * x54) + (((uint64_t)x29 * x55) + ((uint64_t)x28 * x53)))));
+ { uint64_t x60 = ((((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((uint64_t)x23 * x31)))))))))) + (0x11 * (((uint64_t)x25 * x54) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + ((uint64_t)x28 * x51))))));
+ { uint64_t x61 = ((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x21 * x31))))))))) + (0x11 * (((uint64_t)x23 * x54) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + (((uint64_t)x29 * x51) + ((uint64_t)x28 * x49)))))));
+ { uint64_t x62 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + ((uint64_t)x19 * x31)))))))) + (0x11 * (((uint64_t)x21 * x54) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + ((uint64_t)x28 * x47))))))));
+ { uint64_t x63 = ((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + ((uint64_t)x17 * x31))))))) + (0x11 * (((uint64_t)x19 * x54) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + ((uint64_t)x28 * x45)))))))));
+ { uint64_t x64 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + ((uint64_t)x15 * x31)))))) + (0x11 * (((uint64_t)x17 * x54) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + ((uint64_t)x28 * x43))))))))));
+ { uint64_t x65 = ((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + ((uint64_t)x13 * x31))))) + (0x11 * (((uint64_t)x15 * x54) + (((uint64_t)x17 * x55) + (((uint64_t)x19 * x53) + (((uint64_t)x21 * x51) + (((uint64_t)x23 * x49) + (((uint64_t)x25 * x47) + (((uint64_t)x27 * x45) + (((uint64_t)x29 * x43) + ((uint64_t)x28 * x41)))))))))));
+ { uint64_t x66 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + ((uint64_t)x11 * x31)))) + (0x11 * (((uint64_t)x13 * x54) + (((uint64_t)x15 * x55) + (((uint64_t)x17 * x53) + (((uint64_t)x19 * x51) + (((uint64_t)x21 * x49) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + (((uint64_t)x27 * x43) + (((uint64_t)x29 * x41) + ((uint64_t)x28 * x39))))))))))));
+ { uint64_t x67 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + ((uint64_t)x9 * x31))) + (0x11 * (((uint64_t)x11 * x54) + (((uint64_t)x13 * x55) + (((uint64_t)x15 * x53) + (((uint64_t)x17 * x51) + (((uint64_t)x19 * x49) + (((uint64_t)x21 * x47) + (((uint64_t)x23 * x45) + (((uint64_t)x25 * x43) + (((uint64_t)x27 * x41) + (((uint64_t)x29 * x39) + ((uint64_t)x28 * x37)))))))))))));
+ { uint64_t x68 = ((((uint64_t)x5 * x33) + ((uint64_t)x7 * x31)) + (0x11 * (((uint64_t)x9 * x54) + (((uint64_t)x11 * x55) + (((uint64_t)x13 * x53) + (((uint64_t)x15 * x51) + (((uint64_t)x17 * x49) + (((uint64_t)x19 * x47) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + (((uint64_t)x27 * x39) + (((uint64_t)x29 * x37) + ((uint64_t)x28 * x35))))))))))))));
+ { uint64_t x69 = (((uint64_t)x5 * x31) + (0x11 * (((uint64_t)x7 * x54) + (((uint64_t)x9 * x55) + (((uint64_t)x11 * x53) + (((uint64_t)x13 * x51) + (((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + (((uint64_t)x19 * x45) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + (((uint64_t)x25 * x39) + (((uint64_t)x27 * x37) + (((uint64_t)x29 * x35) + ((uint64_t)x28 * x33)))))))))))))));
+ { uint64_t x70 = (x69 >> 0x18);
+ { uint32_t x71 = ((uint32_t)x69 & 0xffffff);
+ { uint64_t x72 = (x70 + x68);
+ { uint64_t x73 = (x72 >> 0x18);
+ { uint32_t x74 = ((uint32_t)x72 & 0xffffff);
+ { uint64_t x75 = (x73 + x67);
+ { uint64_t x76 = (x75 >> 0x18);
+ { uint32_t x77 = ((uint32_t)x75 & 0xffffff);
+ { uint64_t x78 = (x76 + x66);
+ { uint64_t x79 = (x78 >> 0x18);
+ { uint32_t x80 = ((uint32_t)x78 & 0xffffff);
+ { uint64_t x81 = (x79 + x65);
+ { uint64_t x82 = (x81 >> 0x18);
+ { uint32_t x83 = ((uint32_t)x81 & 0xffffff);
+ { uint64_t x84 = (x82 + x64);
+ { uint64_t x85 = (x84 >> 0x18);
+ { uint32_t x86 = ((uint32_t)x84 & 0xffffff);
+ { uint64_t x87 = (x85 + x63);
+ { uint64_t x88 = (x87 >> 0x18);
+ { uint32_t x89 = ((uint32_t)x87 & 0xffffff);
+ { uint64_t x90 = (x88 + x62);
+ { uint64_t x91 = (x90 >> 0x18);
+ { uint32_t x92 = ((uint32_t)x90 & 0xffffff);
+ { uint64_t x93 = (x91 + x61);
+ { uint64_t x94 = (x93 >> 0x18);
+ { uint32_t x95 = ((uint32_t)x93 & 0xffffff);
+ { uint64_t x96 = (x94 + x60);
+ { uint64_t x97 = (x96 >> 0x18);
+ { uint32_t x98 = ((uint32_t)x96 & 0xffffff);
+ { uint64_t x99 = (x97 + x59);
+ { uint64_t x100 = (x99 >> 0x18);
+ { uint32_t x101 = ((uint32_t)x99 & 0xffffff);
+ { uint64_t x102 = (x100 + x58);
+ { uint64_t x103 = (x102 >> 0x18);
+ { uint32_t x104 = ((uint32_t)x102 & 0xffffff);
+ { uint64_t x105 = (x103 + x57);
+ { uint64_t x106 = (x105 >> 0x18);
+ { uint32_t x107 = ((uint32_t)x105 & 0xffffff);
+ { uint64_t x108 = (x106 + x56);
+ { uint32_t x109 = (uint32_t) (x108 >> 0x18);
+ { uint32_t x110 = ((uint32_t)x108 & 0xffffff);
+ { uint64_t x111 = (x71 + ((uint64_t)0x11 * x109));
+ { uint32_t x112 = (uint32_t) (x111 >> 0x18);
+ { uint32_t x113 = ((uint32_t)x111 & 0xffffff);
+ { uint32_t x114 = (x112 + x74);
+ { uint32_t x115 = (x114 >> 0x18);
+ { uint32_t x116 = (x114 & 0xffffff);
+ out[0] = x113;
+ out[1] = x116;
+ out[2] = (x115 + x77);
+ out[3] = x80;
+ out[4] = x83;
+ out[5] = x86;
+ out[6] = x89;
+ out[7] = x92;
+ out[8] = x95;
+ out[9] = x98;
+ out[10] = x101;
+ out[11] = x104;
+ out[12] = x107;
+ out[13] = x110;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e336m17/fesquare.c b/src/Specific/solinas32_2e336m17/fesquare.c
index 5add565ee..5b42995f1 100644
--- a/src/Specific/solinas32_2e336m17/fesquare.c
+++ b/src/Specific/solinas32_2e336m17/fesquare.c
@@ -1,96 +1,92 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x25, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x27 = (((uint64_t)x2 * x25) + (((uint64_t)x4 * x26) + (((uint64_t)x6 * x24) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + (((uint64_t)x24 * x6) + (((uint64_t)x26 * x4) + ((uint64_t)x25 * x2))))))))))))));
-{ uint64_t x28 = ((((uint64_t)x2 * x26) + (((uint64_t)x4 * x24) + (((uint64_t)x6 * x22) + (((uint64_t)x8 * x20) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + (((uint64_t)x20 * x8) + (((uint64_t)x22 * x6) + (((uint64_t)x24 * x4) + ((uint64_t)x26 * x2))))))))))))) + (0x11 * ((uint64_t)x25 * x25)));
-{ uint64_t x29 = ((((uint64_t)x2 * x24) + (((uint64_t)x4 * x22) + (((uint64_t)x6 * x20) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + (((uint64_t)x20 * x6) + (((uint64_t)x22 * x4) + ((uint64_t)x24 * x2)))))))))))) + (0x11 * (((uint64_t)x26 * x25) + ((uint64_t)x25 * x26))));
-{ uint64_t x30 = ((((uint64_t)x2 * x22) + (((uint64_t)x4 * x20) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + (((uint64_t)x20 * x4) + ((uint64_t)x22 * x2))))))))))) + (0x11 * (((uint64_t)x24 * x25) + (((uint64_t)x26 * x26) + ((uint64_t)x25 * x24)))));
-{ uint64_t x31 = ((((uint64_t)x2 * x20) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x20 * x2)))))))))) + (0x11 * (((uint64_t)x22 * x25) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + ((uint64_t)x25 * x22))))));
-{ uint64_t x32 = ((((uint64_t)x2 * x18) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x18 * x2))))))))) + (0x11 * (((uint64_t)x20 * x25) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + ((uint64_t)x25 * x20)))))));
-{ uint64_t x33 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x11 * (((uint64_t)x18 * x25) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + ((uint64_t)x25 * x18))))))));
-{ uint64_t x34 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x11 * (((uint64_t)x16 * x25) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + (((uint64_t)x26 * x18) + ((uint64_t)x25 * x16)))))))));
-{ uint64_t x35 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x11 * (((uint64_t)x14 * x25) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + (((uint64_t)x26 * x16) + ((uint64_t)x25 * x14))))))))));
-{ uint64_t x36 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x11 * (((uint64_t)x12 * x25) + (((uint64_t)x14 * x26) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + (((uint64_t)x26 * x14) + ((uint64_t)x25 * x12)))))))))));
-{ uint64_t x37 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x11 * (((uint64_t)x10 * x25) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + ((uint64_t)x25 * x10))))))))))));
-{ uint64_t x38 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x11 * (((uint64_t)x8 * x25) + (((uint64_t)x10 * x26) + (((uint64_t)x12 * x24) + (((uint64_t)x14 * x22) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + (((uint64_t)x24 * x12) + (((uint64_t)x26 * x10) + ((uint64_t)x25 * x8)))))))))))));
-{ uint64_t x39 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x11 * (((uint64_t)x6 * x25) + (((uint64_t)x8 * x26) + (((uint64_t)x10 * x24) + (((uint64_t)x12 * x22) + (((uint64_t)x14 * x20) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + (((uint64_t)x20 * x14) + (((uint64_t)x22 * x12) + (((uint64_t)x24 * x10) + (((uint64_t)x26 * x8) + ((uint64_t)x25 * x6))))))))))))));
-{ uint64_t x40 = (((uint64_t)x2 * x2) + (0x11 * (((uint64_t)x4 * x25) + (((uint64_t)x6 * x26) + (((uint64_t)x8 * x24) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + (((uint64_t)x24 * x8) + (((uint64_t)x26 * x6) + ((uint64_t)x25 * x4)))))))))))))));
-{ uint64_t x41 = (x40 >> 0x18);
-{ uint32_t x42 = ((uint32_t)x40 & 0xffffff);
-{ uint64_t x43 = (x41 + x39);
-{ uint64_t x44 = (x43 >> 0x18);
-{ uint32_t x45 = ((uint32_t)x43 & 0xffffff);
-{ uint64_t x46 = (x44 + x38);
-{ uint64_t x47 = (x46 >> 0x18);
-{ uint32_t x48 = ((uint32_t)x46 & 0xffffff);
-{ uint64_t x49 = (x47 + x37);
-{ uint64_t x50 = (x49 >> 0x18);
-{ uint32_t x51 = ((uint32_t)x49 & 0xffffff);
-{ uint64_t x52 = (x50 + x36);
-{ uint64_t x53 = (x52 >> 0x18);
-{ uint32_t x54 = ((uint32_t)x52 & 0xffffff);
-{ uint64_t x55 = (x53 + x35);
-{ uint64_t x56 = (x55 >> 0x18);
-{ uint32_t x57 = ((uint32_t)x55 & 0xffffff);
-{ uint64_t x58 = (x56 + x34);
-{ uint64_t x59 = (x58 >> 0x18);
-{ uint32_t x60 = ((uint32_t)x58 & 0xffffff);
-{ uint64_t x61 = (x59 + x33);
-{ uint64_t x62 = (x61 >> 0x18);
-{ uint32_t x63 = ((uint32_t)x61 & 0xffffff);
-{ uint64_t x64 = (x62 + x32);
-{ uint64_t x65 = (x64 >> 0x18);
-{ uint32_t x66 = ((uint32_t)x64 & 0xffffff);
-{ uint64_t x67 = (x65 + x31);
-{ uint64_t x68 = (x67 >> 0x18);
-{ uint32_t x69 = ((uint32_t)x67 & 0xffffff);
-{ uint64_t x70 = (x68 + x30);
-{ uint64_t x71 = (x70 >> 0x18);
-{ uint32_t x72 = ((uint32_t)x70 & 0xffffff);
-{ uint64_t x73 = (x71 + x29);
-{ uint64_t x74 = (x73 >> 0x18);
-{ uint32_t x75 = ((uint32_t)x73 & 0xffffff);
-{ uint64_t x76 = (x74 + x28);
-{ uint64_t x77 = (x76 >> 0x18);
-{ uint32_t x78 = ((uint32_t)x76 & 0xffffff);
-{ uint64_t x79 = (x77 + x27);
-{ uint32_t x80 = (uint32_t) (x79 >> 0x18);
-{ uint32_t x81 = ((uint32_t)x79 & 0xffffff);
-{ uint64_t x82 = (x42 + ((uint64_t)0x11 * x80));
-{ uint32_t x83 = (uint32_t) (x82 >> 0x18);
-{ uint32_t x84 = ((uint32_t)x82 & 0xffffff);
-{ uint32_t x85 = (x83 + x45);
-{ uint32_t x86 = (x85 >> 0x18);
-{ uint32_t x87 = (x85 & 0xffffff);
-out[0] = x81;
-out[1] = x78;
-out[2] = x75;
-out[3] = x72;
-out[4] = x69;
-out[5] = x66;
-out[6] = x63;
-out[7] = x60;
-out[8] = x57;
-out[9] = x54;
-out[10] = x51;
-out[11] = x86 + x48;
-out[12] = x87;
-out[13] = x84;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[14];
+static void fesquare(uint32_t out[14], const uint32_t in1[14]) {
+ { const uint32_t x25 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x27 = (((uint64_t)x2 * x25) + (((uint64_t)x4 * x26) + (((uint64_t)x6 * x24) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + (((uint64_t)x24 * x6) + (((uint64_t)x26 * x4) + ((uint64_t)x25 * x2))))))))))))));
+ { uint64_t x28 = ((((uint64_t)x2 * x26) + (((uint64_t)x4 * x24) + (((uint64_t)x6 * x22) + (((uint64_t)x8 * x20) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + (((uint64_t)x20 * x8) + (((uint64_t)x22 * x6) + (((uint64_t)x24 * x4) + ((uint64_t)x26 * x2))))))))))))) + (0x11 * ((uint64_t)x25 * x25)));
+ { uint64_t x29 = ((((uint64_t)x2 * x24) + (((uint64_t)x4 * x22) + (((uint64_t)x6 * x20) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + (((uint64_t)x20 * x6) + (((uint64_t)x22 * x4) + ((uint64_t)x24 * x2)))))))))))) + (0x11 * (((uint64_t)x26 * x25) + ((uint64_t)x25 * x26))));
+ { uint64_t x30 = ((((uint64_t)x2 * x22) + (((uint64_t)x4 * x20) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + (((uint64_t)x20 * x4) + ((uint64_t)x22 * x2))))))))))) + (0x11 * (((uint64_t)x24 * x25) + (((uint64_t)x26 * x26) + ((uint64_t)x25 * x24)))));
+ { uint64_t x31 = ((((uint64_t)x2 * x20) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x20 * x2)))))))))) + (0x11 * (((uint64_t)x22 * x25) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + ((uint64_t)x25 * x22))))));
+ { uint64_t x32 = ((((uint64_t)x2 * x18) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x18 * x2))))))))) + (0x11 * (((uint64_t)x20 * x25) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + ((uint64_t)x25 * x20)))))));
+ { uint64_t x33 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x11 * (((uint64_t)x18 * x25) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + ((uint64_t)x25 * x18))))))));
+ { uint64_t x34 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x11 * (((uint64_t)x16 * x25) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + (((uint64_t)x26 * x18) + ((uint64_t)x25 * x16)))))))));
+ { uint64_t x35 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x11 * (((uint64_t)x14 * x25) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + (((uint64_t)x26 * x16) + ((uint64_t)x25 * x14))))))))));
+ { uint64_t x36 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x11 * (((uint64_t)x12 * x25) + (((uint64_t)x14 * x26) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + (((uint64_t)x26 * x14) + ((uint64_t)x25 * x12)))))))))));
+ { uint64_t x37 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x11 * (((uint64_t)x10 * x25) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + ((uint64_t)x25 * x10))))))))))));
+ { uint64_t x38 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x11 * (((uint64_t)x8 * x25) + (((uint64_t)x10 * x26) + (((uint64_t)x12 * x24) + (((uint64_t)x14 * x22) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + (((uint64_t)x24 * x12) + (((uint64_t)x26 * x10) + ((uint64_t)x25 * x8)))))))))))));
+ { uint64_t x39 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x11 * (((uint64_t)x6 * x25) + (((uint64_t)x8 * x26) + (((uint64_t)x10 * x24) + (((uint64_t)x12 * x22) + (((uint64_t)x14 * x20) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + (((uint64_t)x20 * x14) + (((uint64_t)x22 * x12) + (((uint64_t)x24 * x10) + (((uint64_t)x26 * x8) + ((uint64_t)x25 * x6))))))))))))));
+ { uint64_t x40 = (((uint64_t)x2 * x2) + (0x11 * (((uint64_t)x4 * x25) + (((uint64_t)x6 * x26) + (((uint64_t)x8 * x24) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + (((uint64_t)x24 * x8) + (((uint64_t)x26 * x6) + ((uint64_t)x25 * x4)))))))))))))));
+ { uint64_t x41 = (x40 >> 0x18);
+ { uint32_t x42 = ((uint32_t)x40 & 0xffffff);
+ { uint64_t x43 = (x41 + x39);
+ { uint64_t x44 = (x43 >> 0x18);
+ { uint32_t x45 = ((uint32_t)x43 & 0xffffff);
+ { uint64_t x46 = (x44 + x38);
+ { uint64_t x47 = (x46 >> 0x18);
+ { uint32_t x48 = ((uint32_t)x46 & 0xffffff);
+ { uint64_t x49 = (x47 + x37);
+ { uint64_t x50 = (x49 >> 0x18);
+ { uint32_t x51 = ((uint32_t)x49 & 0xffffff);
+ { uint64_t x52 = (x50 + x36);
+ { uint64_t x53 = (x52 >> 0x18);
+ { uint32_t x54 = ((uint32_t)x52 & 0xffffff);
+ { uint64_t x55 = (x53 + x35);
+ { uint64_t x56 = (x55 >> 0x18);
+ { uint32_t x57 = ((uint32_t)x55 & 0xffffff);
+ { uint64_t x58 = (x56 + x34);
+ { uint64_t x59 = (x58 >> 0x18);
+ { uint32_t x60 = ((uint32_t)x58 & 0xffffff);
+ { uint64_t x61 = (x59 + x33);
+ { uint64_t x62 = (x61 >> 0x18);
+ { uint32_t x63 = ((uint32_t)x61 & 0xffffff);
+ { uint64_t x64 = (x62 + x32);
+ { uint64_t x65 = (x64 >> 0x18);
+ { uint32_t x66 = ((uint32_t)x64 & 0xffffff);
+ { uint64_t x67 = (x65 + x31);
+ { uint64_t x68 = (x67 >> 0x18);
+ { uint32_t x69 = ((uint32_t)x67 & 0xffffff);
+ { uint64_t x70 = (x68 + x30);
+ { uint64_t x71 = (x70 >> 0x18);
+ { uint32_t x72 = ((uint32_t)x70 & 0xffffff);
+ { uint64_t x73 = (x71 + x29);
+ { uint64_t x74 = (x73 >> 0x18);
+ { uint32_t x75 = ((uint32_t)x73 & 0xffffff);
+ { uint64_t x76 = (x74 + x28);
+ { uint64_t x77 = (x76 >> 0x18);
+ { uint32_t x78 = ((uint32_t)x76 & 0xffffff);
+ { uint64_t x79 = (x77 + x27);
+ { uint32_t x80 = (uint32_t) (x79 >> 0x18);
+ { uint32_t x81 = ((uint32_t)x79 & 0xffffff);
+ { uint64_t x82 = (x42 + ((uint64_t)0x11 * x80));
+ { uint32_t x83 = (uint32_t) (x82 >> 0x18);
+ { uint32_t x84 = ((uint32_t)x82 & 0xffffff);
+ { uint32_t x85 = (x83 + x45);
+ { uint32_t x86 = (x85 >> 0x18);
+ { uint32_t x87 = (x85 & 0xffffff);
+ out[0] = x84;
+ out[1] = x87;
+ out[2] = (x86 + x48);
+ out[3] = x51;
+ out[4] = x54;
+ out[5] = x57;
+ out[6] = x60;
+ out[7] = x63;
+ out[8] = x66;
+ out[9] = x69;
+ out[10] = x72;
+ out[11] = x75;
+ out[12] = x78;
+ out[13] = x81;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e336m17/freeze.c b/src/Specific/solinas32_2e336m17/freeze.c
index c6b0c0494..6e1ed8a8f 100644
--- a/src/Specific/solinas32_2e336m17/freeze.c
+++ b/src/Specific/solinas32_2e336m17/freeze.c
@@ -1,25 +1,74 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x25, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x28;
-out[1] = uint8_t x29 = Op Syntax.SubWithGetBorrow 24 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffef;;
+static void freeze(uint32_t out[14], const uint32_t in1[14]) {
+ { const uint32_t x25 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0xffffef);
+ { uint32_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x29, Return x4, 0xffffff);
+ { uint32_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x32, Return x6, 0xffffff);
+ { uint32_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x35, Return x8, 0xffffff);
+ { uint32_t x40, uint8_t x41 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x38, Return x10, 0xffffff);
+ { uint32_t x43, uint8_t x44 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x41, Return x12, 0xffffff);
+ { uint32_t x46, uint8_t x47 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x44, Return x14, 0xffffff);
+ { uint32_t x49, uint8_t x50 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x47, Return x16, 0xffffff);
+ { uint32_t x52, uint8_t x53 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x50, Return x18, 0xffffff);
+ { uint32_t x55, uint8_t x56 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x53, Return x20, 0xffffff);
+ { uint32_t x58, uint8_t x59 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x56, Return x22, 0xffffff);
+ { uint32_t x61, uint8_t x62 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x59, Return x24, 0xffffff);
+ { uint32_t x64, uint8_t x65 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x62, Return x26, 0xffffff);
+ { uint32_t x67, uint8_t x68 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x65, Return x25, 0xffffff);
+ { uint32_t x69 = (uint32_t)cmovznz(x68, 0x0, 0xffffffff);
+ { uint32_t x70 = (x69 & 0xffffef);
+ { uint32_t x72, uint8_t x73 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x28, Return x70);
+ { uint32_t x74 = (x69 & 0xffffff);
+ { uint32_t x76, uint8_t x77 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x73, Return x31, Return x74);
+ { uint32_t x78 = (x69 & 0xffffff);
+ { uint32_t x80, uint8_t x81 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x77, Return x34, Return x78);
+ { uint32_t x82 = (x69 & 0xffffff);
+ { uint32_t x84, uint8_t x85 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x81, Return x37, Return x82);
+ { uint32_t x86 = (x69 & 0xffffff);
+ { uint32_t x88, uint8_t x89 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x85, Return x40, Return x86);
+ { uint32_t x90 = (x69 & 0xffffff);
+ { uint32_t x92, uint8_t x93 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x89, Return x43, Return x90);
+ { uint32_t x94 = (x69 & 0xffffff);
+ { uint32_t x96, uint8_t x97 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x93, Return x46, Return x94);
+ { uint32_t x98 = (x69 & 0xffffff);
+ { uint32_t x100, uint8_t x101 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x97, Return x49, Return x98);
+ { uint32_t x102 = (x69 & 0xffffff);
+ { uint32_t x104, uint8_t x105 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x101, Return x52, Return x102);
+ { uint32_t x106 = (x69 & 0xffffff);
+ { uint32_t x108, uint8_t x109 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x105, Return x55, Return x106);
+ { uint32_t x110 = (x69 & 0xffffff);
+ { uint32_t x112, uint8_t x113 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x109, Return x58, Return x110);
+ { uint32_t x114 = (x69 & 0xffffff);
+ { uint32_t x116, uint8_t x117 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x113, Return x61, Return x114);
+ { uint32_t x118 = (x69 & 0xffffff);
+ { uint32_t x120, uint8_t x121 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x117, Return x64, Return x118);
+ { uint32_t x122 = (x69 & 0xffffff);
+ { uint32_t x124, uint8_t _ = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x121, Return x67, Return x122);
+ out[0] = x72;
+ out[1] = x76;
+ out[2] = x80;
+ out[3] = x84;
+ out[4] = x88;
+ out[5] = x92;
+ out[6] = x96;
+ out[7] = x100;
+ out[8] = x104;
+ out[9] = x108;
+ out[10] = x112;
+ out[11] = x116;
+ out[12] = x120;
+ out[13] = x124;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e336m3/femul.c b/src/Specific/solinas32_2e336m3/femul.c
index e1eb4f061..4d7bf5a52 100644
--- a/src/Specific/solinas32_2e336m3/femul.c
+++ b/src/Specific/solinas32_2e336m3/femul.c
@@ -1,86 +1,92 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
-{ uint64_t x48 = (((uint64_t)x5 * x46) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + (((uint64_t)x23 * x31) + (((uint64_t)x25 * x29) + ((uint64_t)x24 * x27))))))))))));
-{ uint64_t x49 = ((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + (((uint64_t)x23 * x29) + ((uint64_t)x25 * x27))))))))))) + (0x3 * ((uint64_t)x24 * x46)));
-{ uint64_t x50 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x23 * x27)))))))))) + (0x3 * (((uint64_t)x25 * x46) + ((uint64_t)x24 * x47))));
-{ uint64_t x51 = ((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + (((uint64_t)x19 * x29) + ((uint64_t)x21 * x27))))))))) + (0x3 * (((uint64_t)x23 * x46) + (((uint64_t)x25 * x47) + ((uint64_t)x24 * x45)))));
-{ uint64_t x52 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x19 * x27)))))))) + (0x3 * (((uint64_t)x21 * x46) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + ((uint64_t)x24 * x43))))));
-{ uint64_t x53 = ((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + ((uint64_t)x17 * x27))))))) + (0x3 * (((uint64_t)x19 * x46) + (((uint64_t)x21 * x47) + (((uint64_t)x23 * x45) + (((uint64_t)x25 * x43) + ((uint64_t)x24 * x41)))))));
-{ ℤ x54 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + ((uint64_t)x15 * x27)))))) +ℤ (0x3 * (((uint64_t)x17 * x46) + (((uint64_t)x19 * x47) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + ((uint64_t)x24 * x39))))))));
-{ ℤ x55 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + ((uint64_t)x13 * x27))))) +ℤ (0x3 * (((uint64_t)x15 * x46) + (((uint64_t)x17 * x47) + (((uint64_t)x19 * x45) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + (((uint64_t)x25 * x39) + ((uint64_t)x24 * x37)))))))));
-{ ℤ x56 = ((((uint64_t)x5 * x33) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + ((uint64_t)x11 * x27)))) +ℤ (0x3 *ℤ (((uint64_t)x13 * x46) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + (((uint64_t)x23 * x39) + (((uint64_t)x25 * x37) + ((uint64_t)x24 * x35))))))))));
-{ ℤ x57 = ((((uint64_t)x5 * x31) + (((uint64_t)x7 * x29) + ((uint64_t)x9 * x27))) +ℤ (0x3 *ℤ (((uint64_t)x11 * x46) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + (((uint64_t)x25 * x35) + ((uint64_t)x24 * x33)))))))))));
-{ ℤ x58 = ((((uint64_t)x5 * x29) + ((uint64_t)x7 * x27)) +ℤ (0x3 *ℤ (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31))))))))))));
-{ ℤ x59 = (((uint64_t)x5 * x27) +ℤ (0x3 *ℤ (((uint64_t)x7 * x46) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + (((uint64_t)x23 * x33) + (((uint64_t)x25 * x31) + ((uint64_t)x24 * x29)))))))))))));
-{ uint64_t x60 = (x59 >> 0x1c);
-{ uint32_t x61 = (x59 & 0xfffffff);
-{ ℤ x62 = (x60 +ℤ x58);
-{ uint64_t x63 = (x62 >> 0x1c);
-{ uint32_t x64 = (x62 & 0xfffffff);
-{ ℤ x65 = (x63 +ℤ x57);
-{ uint64_t x66 = (x65 >> 0x1c);
-{ uint32_t x67 = (x65 & 0xfffffff);
-{ ℤ x68 = (x66 +ℤ x56);
-{ uint64_t x69 = (x68 >> 0x1c);
-{ uint32_t x70 = (x68 & 0xfffffff);
-{ ℤ x71 = (x69 +ℤ x55);
-{ uint64_t x72 = (x71 >> 0x1c);
-{ uint32_t x73 = (x71 & 0xfffffff);
-{ ℤ x74 = (x72 +ℤ x54);
-{ uint64_t x75 = (x74 >> 0x1c);
-{ uint32_t x76 = (x74 & 0xfffffff);
-{ uint64_t x77 = (x75 + x53);
-{ uint64_t x78 = (x77 >> 0x1c);
-{ uint32_t x79 = ((uint32_t)x77 & 0xfffffff);
-{ uint64_t x80 = (x78 + x52);
-{ uint64_t x81 = (x80 >> 0x1c);
-{ uint32_t x82 = ((uint32_t)x80 & 0xfffffff);
-{ uint64_t x83 = (x81 + x51);
-{ uint64_t x84 = (x83 >> 0x1c);
-{ uint32_t x85 = ((uint32_t)x83 & 0xfffffff);
-{ uint64_t x86 = (x84 + x50);
-{ uint64_t x87 = (x86 >> 0x1c);
-{ uint32_t x88 = ((uint32_t)x86 & 0xfffffff);
-{ uint64_t x89 = (x87 + x49);
-{ uint64_t x90 = (x89 >> 0x1c);
-{ uint32_t x91 = ((uint32_t)x89 & 0xfffffff);
-{ uint64_t x92 = (x90 + x48);
-{ uint64_t x93 = (x92 >> 0x1c);
-{ uint32_t x94 = ((uint32_t)x92 & 0xfffffff);
-{ uint64_t x95 = (x61 + (0x3 * x93));
-{ uint32_t x96 = (uint32_t) (x95 >> 0x1c);
-{ uint32_t x97 = ((uint32_t)x95 & 0xfffffff);
-{ uint32_t x98 = (x96 + x64);
-{ uint32_t x99 = (x98 >> 0x1c);
-{ uint32_t x100 = (x98 & 0xfffffff);
-out[0] = x94;
-out[1] = x91;
-out[2] = x88;
-out[3] = x85;
-out[4] = x82;
-out[5] = x79;
-out[6] = x76;
-out[7] = x73;
-out[8] = x70;
-out[9] = x99 + x67;
-out[10] = x100;
-out[11] = x97;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void femul(uint32_t out[12], const uint32_t in1[12], const uint32_t in2[12]) {
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x46 = in2[11];
+ { const uint32_t x47 = in2[10];
+ { const uint32_t x45 = in2[9];
+ { const uint32_t x43 = in2[8];
+ { const uint32_t x41 = in2[7];
+ { const uint32_t x39 = in2[6];
+ { const uint32_t x37 = in2[5];
+ { const uint32_t x35 = in2[4];
+ { const uint32_t x33 = in2[3];
+ { const uint32_t x31 = in2[2];
+ { const uint32_t x29 = in2[1];
+ { const uint32_t x27 = in2[0];
+ { uint64_t x48 = (((uint64_t)x5 * x46) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + (((uint64_t)x23 * x31) + (((uint64_t)x25 * x29) + ((uint64_t)x24 * x27))))))))))));
+ { uint64_t x49 = ((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + (((uint64_t)x23 * x29) + ((uint64_t)x25 * x27))))))))))) + (0x3 * ((uint64_t)x24 * x46)));
+ { uint64_t x50 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x23 * x27)))))))))) + (0x3 * (((uint64_t)x25 * x46) + ((uint64_t)x24 * x47))));
+ { uint64_t x51 = ((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + (((uint64_t)x19 * x29) + ((uint64_t)x21 * x27))))))))) + (0x3 * (((uint64_t)x23 * x46) + (((uint64_t)x25 * x47) + ((uint64_t)x24 * x45)))));
+ { uint64_t x52 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x19 * x27)))))))) + (0x3 * (((uint64_t)x21 * x46) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + ((uint64_t)x24 * x43))))));
+ { uint64_t x53 = ((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + ((uint64_t)x17 * x27))))))) + (0x3 * (((uint64_t)x19 * x46) + (((uint64_t)x21 * x47) + (((uint64_t)x23 * x45) + (((uint64_t)x25 * x43) + ((uint64_t)x24 * x41)))))));
+ { ℤ x54 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + ((uint64_t)x15 * x27)))))) +ℤ (0x3 * (((uint64_t)x17 * x46) + (((uint64_t)x19 * x47) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + ((uint64_t)x24 * x39))))))));
+ { ℤ x55 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + ((uint64_t)x13 * x27))))) +ℤ (0x3 * (((uint64_t)x15 * x46) + (((uint64_t)x17 * x47) + (((uint64_t)x19 * x45) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + (((uint64_t)x25 * x39) + ((uint64_t)x24 * x37)))))))));
+ { ℤ x56 = ((((uint64_t)x5 * x33) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + ((uint64_t)x11 * x27)))) +ℤ (0x3 *ℤ (((uint64_t)x13 * x46) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + (((uint64_t)x23 * x39) + (((uint64_t)x25 * x37) + ((uint64_t)x24 * x35))))))))));
+ { ℤ x57 = ((((uint64_t)x5 * x31) + (((uint64_t)x7 * x29) + ((uint64_t)x9 * x27))) +ℤ (0x3 *ℤ (((uint64_t)x11 * x46) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + (((uint64_t)x25 * x35) + ((uint64_t)x24 * x33)))))))))));
+ { ℤ x58 = ((((uint64_t)x5 * x29) + ((uint64_t)x7 * x27)) +ℤ (0x3 *ℤ (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31))))))))))));
+ { ℤ x59 = (((uint64_t)x5 * x27) +ℤ (0x3 *ℤ (((uint64_t)x7 * x46) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + (((uint64_t)x23 * x33) + (((uint64_t)x25 * x31) + ((uint64_t)x24 * x29)))))))))))));
+ { uint64_t x60 = (x59 >> 0x1c);
+ { uint32_t x61 = (x59 & 0xfffffff);
+ { ℤ x62 = (x60 +ℤ x58);
+ { uint64_t x63 = (x62 >> 0x1c);
+ { uint32_t x64 = (x62 & 0xfffffff);
+ { ℤ x65 = (x63 +ℤ x57);
+ { uint64_t x66 = (x65 >> 0x1c);
+ { uint32_t x67 = (x65 & 0xfffffff);
+ { ℤ x68 = (x66 +ℤ x56);
+ { uint64_t x69 = (x68 >> 0x1c);
+ { uint32_t x70 = (x68 & 0xfffffff);
+ { ℤ x71 = (x69 +ℤ x55);
+ { uint64_t x72 = (x71 >> 0x1c);
+ { uint32_t x73 = (x71 & 0xfffffff);
+ { ℤ x74 = (x72 +ℤ x54);
+ { uint64_t x75 = (x74 >> 0x1c);
+ { uint32_t x76 = (x74 & 0xfffffff);
+ { uint64_t x77 = (x75 + x53);
+ { uint64_t x78 = (x77 >> 0x1c);
+ { uint32_t x79 = ((uint32_t)x77 & 0xfffffff);
+ { uint64_t x80 = (x78 + x52);
+ { uint64_t x81 = (x80 >> 0x1c);
+ { uint32_t x82 = ((uint32_t)x80 & 0xfffffff);
+ { uint64_t x83 = (x81 + x51);
+ { uint64_t x84 = (x83 >> 0x1c);
+ { uint32_t x85 = ((uint32_t)x83 & 0xfffffff);
+ { uint64_t x86 = (x84 + x50);
+ { uint64_t x87 = (x86 >> 0x1c);
+ { uint32_t x88 = ((uint32_t)x86 & 0xfffffff);
+ { uint64_t x89 = (x87 + x49);
+ { uint64_t x90 = (x89 >> 0x1c);
+ { uint32_t x91 = ((uint32_t)x89 & 0xfffffff);
+ { uint64_t x92 = (x90 + x48);
+ { uint64_t x93 = (x92 >> 0x1c);
+ { uint32_t x94 = ((uint32_t)x92 & 0xfffffff);
+ { uint64_t x95 = (x61 + (0x3 * x93));
+ { uint32_t x96 = (uint32_t) (x95 >> 0x1c);
+ { uint32_t x97 = ((uint32_t)x95 & 0xfffffff);
+ { uint32_t x98 = (x96 + x64);
+ { uint32_t x99 = (x98 >> 0x1c);
+ { uint32_t x100 = (x98 & 0xfffffff);
+ out[0] = x97;
+ out[1] = x100;
+ out[2] = (x99 + x67);
+ out[3] = x70;
+ out[4] = x73;
+ out[5] = x76;
+ out[6] = x79;
+ out[7] = x82;
+ out[8] = x85;
+ out[9] = x88;
+ out[10] = x91;
+ out[11] = x94;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e336m3/fesquare.c b/src/Specific/solinas32_2e336m3/fesquare.c
index 12ee7761a..70f7fecb8 100644
--- a/src/Specific/solinas32_2e336m3/fesquare.c
+++ b/src/Specific/solinas32_2e336m3/fesquare.c
@@ -1,86 +1,80 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x23 = (((uint64_t)x2 * x21) + (((uint64_t)x4 * x22) + (((uint64_t)x6 * x20) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + (((uint64_t)x20 * x6) + (((uint64_t)x22 * x4) + ((uint64_t)x21 * x2))))))))))));
-{ uint64_t x24 = ((((uint64_t)x2 * x22) + (((uint64_t)x4 * x20) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + (((uint64_t)x20 * x4) + ((uint64_t)x22 * x2))))))))))) + (0x3 * ((uint64_t)x21 * x21)));
-{ uint64_t x25 = ((((uint64_t)x2 * x20) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x20 * x2)))))))))) + (0x3 * (((uint64_t)x22 * x21) + ((uint64_t)x21 * x22))));
-{ uint64_t x26 = ((((uint64_t)x2 * x18) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x18 * x2))))))))) + (0x3 * (((uint64_t)x20 * x21) + (((uint64_t)x22 * x22) + ((uint64_t)x21 * x20)))));
-{ uint64_t x27 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x3 * (((uint64_t)x18 * x21) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x21 * x18))))));
-{ uint64_t x28 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x3 * (((uint64_t)x16 * x21) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + ((uint64_t)x21 * x16)))))));
-{ ℤ x29 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) +ℤ (0x3 * (((uint64_t)x14 * x21) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + ((uint64_t)x21 * x14))))))));
-{ ℤ x30 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) +ℤ (0x3 * (((uint64_t)x12 * x21) + (((uint64_t)x14 * x22) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + ((uint64_t)x21 * x12)))))))));
-{ ℤ x31 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) +ℤ (0x3 *ℤ (((uint64_t)x10 * x21) + (((uint64_t)x12 * x22) + (((uint64_t)x14 * x20) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + (((uint64_t)x20 * x14) + (((uint64_t)x22 * x12) + ((uint64_t)x21 * x10))))))))));
-{ ℤ x32 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) +ℤ (0x3 *ℤ (((uint64_t)x8 * x21) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + ((uint64_t)x21 * x8)))))))))));
-{ ℤ x33 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) +ℤ (0x3 *ℤ (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6))))))))))));
-{ ℤ x34 = (((uint64_t)x2 * x2) +ℤ (0x3 *ℤ (((uint64_t)x4 * x21) + (((uint64_t)x6 * x22) + (((uint64_t)x8 * x20) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + (((uint64_t)x20 * x8) + (((uint64_t)x22 * x6) + ((uint64_t)x21 * x4)))))))))))));
-{ uint64_t x35 = (x34 >> 0x1c);
-{ uint32_t x36 = (x34 & 0xfffffff);
-{ ℤ x37 = (x35 +ℤ x33);
-{ uint64_t x38 = (x37 >> 0x1c);
-{ uint32_t x39 = (x37 & 0xfffffff);
-{ ℤ x40 = (x38 +ℤ x32);
-{ uint64_t x41 = (x40 >> 0x1c);
-{ uint32_t x42 = (x40 & 0xfffffff);
-{ ℤ x43 = (x41 +ℤ x31);
-{ uint64_t x44 = (x43 >> 0x1c);
-{ uint32_t x45 = (x43 & 0xfffffff);
-{ ℤ x46 = (x44 +ℤ x30);
-{ uint64_t x47 = (x46 >> 0x1c);
-{ uint32_t x48 = (x46 & 0xfffffff);
-{ ℤ x49 = (x47 +ℤ x29);
-{ uint64_t x50 = (x49 >> 0x1c);
-{ uint32_t x51 = (x49 & 0xfffffff);
-{ uint64_t x52 = (x50 + x28);
-{ uint64_t x53 = (x52 >> 0x1c);
-{ uint32_t x54 = ((uint32_t)x52 & 0xfffffff);
-{ uint64_t x55 = (x53 + x27);
-{ uint64_t x56 = (x55 >> 0x1c);
-{ uint32_t x57 = ((uint32_t)x55 & 0xfffffff);
-{ uint64_t x58 = (x56 + x26);
-{ uint64_t x59 = (x58 >> 0x1c);
-{ uint32_t x60 = ((uint32_t)x58 & 0xfffffff);
-{ uint64_t x61 = (x59 + x25);
-{ uint64_t x62 = (x61 >> 0x1c);
-{ uint32_t x63 = ((uint32_t)x61 & 0xfffffff);
-{ uint64_t x64 = (x62 + x24);
-{ uint64_t x65 = (x64 >> 0x1c);
-{ uint32_t x66 = ((uint32_t)x64 & 0xfffffff);
-{ uint64_t x67 = (x65 + x23);
-{ uint64_t x68 = (x67 >> 0x1c);
-{ uint32_t x69 = ((uint32_t)x67 & 0xfffffff);
-{ uint64_t x70 = (x36 + (0x3 * x68));
-{ uint32_t x71 = (uint32_t) (x70 >> 0x1c);
-{ uint32_t x72 = ((uint32_t)x70 & 0xfffffff);
-{ uint32_t x73 = (x71 + x39);
-{ uint32_t x74 = (x73 >> 0x1c);
-{ uint32_t x75 = (x73 & 0xfffffff);
-out[0] = x69;
-out[1] = x66;
-out[2] = x63;
-out[3] = x60;
-out[4] = x57;
-out[5] = x54;
-out[6] = x51;
-out[7] = x48;
-out[8] = x45;
-out[9] = x74 + x42;
-out[10] = x75;
-out[11] = x72;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[12];
+static void fesquare(uint32_t out[12], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x23 = (((uint64_t)x2 * x21) + (((uint64_t)x4 * x22) + (((uint64_t)x6 * x20) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + (((uint64_t)x20 * x6) + (((uint64_t)x22 * x4) + ((uint64_t)x21 * x2))))))))))));
+ { uint64_t x24 = ((((uint64_t)x2 * x22) + (((uint64_t)x4 * x20) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + (((uint64_t)x20 * x4) + ((uint64_t)x22 * x2))))))))))) + (0x3 * ((uint64_t)x21 * x21)));
+ { uint64_t x25 = ((((uint64_t)x2 * x20) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x20 * x2)))))))))) + (0x3 * (((uint64_t)x22 * x21) + ((uint64_t)x21 * x22))));
+ { uint64_t x26 = ((((uint64_t)x2 * x18) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x18 * x2))))))))) + (0x3 * (((uint64_t)x20 * x21) + (((uint64_t)x22 * x22) + ((uint64_t)x21 * x20)))));
+ { uint64_t x27 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x3 * (((uint64_t)x18 * x21) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x21 * x18))))));
+ { uint64_t x28 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x3 * (((uint64_t)x16 * x21) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + ((uint64_t)x21 * x16)))))));
+ { ℤ x29 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) +ℤ (0x3 * (((uint64_t)x14 * x21) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + ((uint64_t)x21 * x14))))))));
+ { ℤ x30 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) +ℤ (0x3 * (((uint64_t)x12 * x21) + (((uint64_t)x14 * x22) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + ((uint64_t)x21 * x12)))))))));
+ { ℤ x31 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) +ℤ (0x3 *ℤ (((uint64_t)x10 * x21) + (((uint64_t)x12 * x22) + (((uint64_t)x14 * x20) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + (((uint64_t)x20 * x14) + (((uint64_t)x22 * x12) + ((uint64_t)x21 * x10))))))))));
+ { ℤ x32 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) +ℤ (0x3 *ℤ (((uint64_t)x8 * x21) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + ((uint64_t)x21 * x8)))))))))));
+ { ℤ x33 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) +ℤ (0x3 *ℤ (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6))))))))))));
+ { ℤ x34 = (((uint64_t)x2 * x2) +ℤ (0x3 *ℤ (((uint64_t)x4 * x21) + (((uint64_t)x6 * x22) + (((uint64_t)x8 * x20) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + (((uint64_t)x20 * x8) + (((uint64_t)x22 * x6) + ((uint64_t)x21 * x4)))))))))))));
+ { uint64_t x35 = (x34 >> 0x1c);
+ { uint32_t x36 = (x34 & 0xfffffff);
+ { ℤ x37 = (x35 +ℤ x33);
+ { uint64_t x38 = (x37 >> 0x1c);
+ { uint32_t x39 = (x37 & 0xfffffff);
+ { ℤ x40 = (x38 +ℤ x32);
+ { uint64_t x41 = (x40 >> 0x1c);
+ { uint32_t x42 = (x40 & 0xfffffff);
+ { ℤ x43 = (x41 +ℤ x31);
+ { uint64_t x44 = (x43 >> 0x1c);
+ { uint32_t x45 = (x43 & 0xfffffff);
+ { ℤ x46 = (x44 +ℤ x30);
+ { uint64_t x47 = (x46 >> 0x1c);
+ { uint32_t x48 = (x46 & 0xfffffff);
+ { ℤ x49 = (x47 +ℤ x29);
+ { uint64_t x50 = (x49 >> 0x1c);
+ { uint32_t x51 = (x49 & 0xfffffff);
+ { uint64_t x52 = (x50 + x28);
+ { uint64_t x53 = (x52 >> 0x1c);
+ { uint32_t x54 = ((uint32_t)x52 & 0xfffffff);
+ { uint64_t x55 = (x53 + x27);
+ { uint64_t x56 = (x55 >> 0x1c);
+ { uint32_t x57 = ((uint32_t)x55 & 0xfffffff);
+ { uint64_t x58 = (x56 + x26);
+ { uint64_t x59 = (x58 >> 0x1c);
+ { uint32_t x60 = ((uint32_t)x58 & 0xfffffff);
+ { uint64_t x61 = (x59 + x25);
+ { uint64_t x62 = (x61 >> 0x1c);
+ { uint32_t x63 = ((uint32_t)x61 & 0xfffffff);
+ { uint64_t x64 = (x62 + x24);
+ { uint64_t x65 = (x64 >> 0x1c);
+ { uint32_t x66 = ((uint32_t)x64 & 0xfffffff);
+ { uint64_t x67 = (x65 + x23);
+ { uint64_t x68 = (x67 >> 0x1c);
+ { uint32_t x69 = ((uint32_t)x67 & 0xfffffff);
+ { uint64_t x70 = (x36 + (0x3 * x68));
+ { uint32_t x71 = (uint32_t) (x70 >> 0x1c);
+ { uint32_t x72 = ((uint32_t)x70 & 0xfffffff);
+ { uint32_t x73 = (x71 + x39);
+ { uint32_t x74 = (x73 >> 0x1c);
+ { uint32_t x75 = (x73 & 0xfffffff);
+ out[0] = x72;
+ out[1] = x75;
+ out[2] = (x74 + x42);
+ out[3] = x45;
+ out[4] = x48;
+ out[5] = x51;
+ out[6] = x54;
+ out[7] = x57;
+ out[8] = x60;
+ out[9] = x63;
+ out[10] = x66;
+ out[11] = x69;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e336m3/freeze.c b/src/Specific/solinas32_2e336m3/freeze.c
index f90fdb50f..7a8107542 100644
--- a/src/Specific/solinas32_2e336m3/freeze.c
+++ b/src/Specific/solinas32_2e336m3/freeze.c
@@ -1,25 +1,64 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x24;
-out[1] = uint8_t x25 = Op Syntax.SubWithGetBorrow 28 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffffd;;
+static void freeze(uint32_t out[12], const uint32_t in1[12]) {
+ { const uint32_t x21 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0xffffffd);
+ { uint32_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x25, Return x4, 0xfffffff);
+ { uint32_t x30, uint8_t x31 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x28, Return x6, 0xfffffff);
+ { uint32_t x33, uint8_t x34 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x31, Return x8, 0xfffffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x34, Return x10, 0xfffffff);
+ { uint32_t x39, uint8_t x40 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x12, 0xfffffff);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x40, Return x14, 0xfffffff);
+ { uint32_t x45, uint8_t x46 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x43, Return x16, 0xfffffff);
+ { uint32_t x48, uint8_t x49 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x46, Return x18, 0xfffffff);
+ { uint32_t x51, uint8_t x52 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x49, Return x20, 0xfffffff);
+ { uint32_t x54, uint8_t x55 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x52, Return x22, 0xfffffff);
+ { uint32_t x57, uint8_t x58 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x55, Return x21, 0xfffffff);
+ { uint32_t x59 = (uint32_t)cmovznz(x58, 0x0, 0xffffffff);
+ { uint32_t x60 = (x59 & 0xffffffd);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x24, Return x60);
+ { uint32_t x64 = (x59 & 0xfffffff);
+ { uint32_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x27, Return x64);
+ { uint32_t x68 = (x59 & 0xfffffff);
+ { uint32_t x70, uint8_t x71 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x67, Return x30, Return x68);
+ { uint32_t x72 = (x59 & 0xfffffff);
+ { uint32_t x74, uint8_t x75 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x71, Return x33, Return x72);
+ { uint32_t x76 = (x59 & 0xfffffff);
+ { uint32_t x78, uint8_t x79 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x36, Return x76);
+ { uint32_t x80 = (x59 & 0xfffffff);
+ { uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x79, Return x39, Return x80);
+ { uint32_t x84 = (x59 & 0xfffffff);
+ { uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x42, Return x84);
+ { uint32_t x88 = (x59 & 0xfffffff);
+ { uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x45, Return x88);
+ { uint32_t x92 = (x59 & 0xfffffff);
+ { uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x48, Return x92);
+ { uint32_t x96 = (x59 & 0xfffffff);
+ { uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x51, Return x96);
+ { uint32_t x100 = (x59 & 0xfffffff);
+ { uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x54, Return x100);
+ { uint32_t x104 = (x59 & 0xfffffff);
+ { uint32_t x106, uint8_t _ = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x57, Return x104);
+ out[0] = x62;
+ out[1] = x66;
+ out[2] = x70;
+ out[3] = x74;
+ out[4] = x78;
+ out[5] = x82;
+ out[6] = x86;
+ out[7] = x90;
+ out[8] = x94;
+ out[9] = x98;
+ out[10] = x102;
+ out[11] = x106;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e338m15/femul.c b/src/Specific/solinas32_2e338m15/femul.c
index f89890ffb..18e92ef65 100644
--- a/src/Specific/solinas32_2e338m15/femul.c
+++ b/src/Specific/solinas32_2e338m15/femul.c
@@ -1,91 +1,99 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x50, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29)
-{ uint64_t x52 = (((uint64_t)x5 * x50) + (((uint64_t)x7 * x51) + (((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + (((uint64_t)x27 * x31) + ((uint64_t)x26 * x29)))))))))))));
-{ uint64_t x53 = ((((uint64_t)x5 * x51) + (((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + (((uint64_t)x23 * x33) + (((uint64_t)x25 * x31) + ((uint64_t)x27 * x29)))))))))))) + (0xf * ((uint64_t)x26 * x50)));
-{ uint64_t x54 = ((((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + (((uint64_t)x23 * x31) + ((uint64_t)x25 * x29))))))))))) + (0xf * (((uint64_t)x27 * x50) + ((uint64_t)x26 * x51))));
-{ uint64_t x55 = ((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((uint64_t)x23 * x29)))))))))) + (0xf * (((uint64_t)x25 * x50) + (((uint64_t)x27 * x51) + ((uint64_t)x26 * x49)))));
-{ uint64_t x56 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + ((uint64_t)x21 * x29))))))))) + (0xf * (((uint64_t)x23 * x50) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + ((uint64_t)x26 * x47))))));
-{ uint64_t x57 = ((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((uint64_t)x19 * x29)))))))) + (0xf * (((uint64_t)x21 * x50) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + ((uint64_t)x26 * x45)))))));
-{ uint64_t x58 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + ((uint64_t)x17 * x29))))))) + (0xf * (((uint64_t)x19 * x50) + (((uint64_t)x21 * x51) + (((uint64_t)x23 * x49) + (((uint64_t)x25 * x47) + (((uint64_t)x27 * x45) + ((uint64_t)x26 * x43))))))));
-{ uint64_t x59 = ((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + ((uint64_t)x15 * x29)))))) + (0xf * (((uint64_t)x17 * x50) + (((uint64_t)x19 * x51) + (((uint64_t)x21 * x49) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + (((uint64_t)x27 * x43) + ((uint64_t)x26 * x41)))))))));
-{ uint64_t x60 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + ((uint64_t)x13 * x29))))) + (0xf * (((uint64_t)x15 * x50) + (((uint64_t)x17 * x51) + (((uint64_t)x19 * x49) + (((uint64_t)x21 * x47) + (((uint64_t)x23 * x45) + (((uint64_t)x25 * x43) + (((uint64_t)x27 * x41) + ((uint64_t)x26 * x39))))))))));
-{ uint64_t x61 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + ((uint64_t)x11 * x29)))) + (0xf * (((uint64_t)x13 * x50) + (((uint64_t)x15 * x51) + (((uint64_t)x17 * x49) + (((uint64_t)x19 * x47) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + (((uint64_t)x27 * x39) + ((uint64_t)x26 * x37)))))))))));
-{ uint64_t x62 = ((((uint64_t)x5 * x33) + (((uint64_t)x7 * x31) + ((uint64_t)x9 * x29))) + (0xf * (((uint64_t)x11 * x50) + (((uint64_t)x13 * x51) + (((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + (((uint64_t)x19 * x45) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + (((uint64_t)x25 * x39) + (((uint64_t)x27 * x37) + ((uint64_t)x26 * x35))))))))))));
-{ uint64_t x63 = ((((uint64_t)x5 * x31) + ((uint64_t)x7 * x29)) + (0xf * (((uint64_t)x9 * x50) + (((uint64_t)x11 * x51) + (((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + (((uint64_t)x23 * x39) + (((uint64_t)x25 * x37) + (((uint64_t)x27 * x35) + ((uint64_t)x26 * x33)))))))))))));
-{ uint64_t x64 = (((uint64_t)x5 * x29) + (0xf * (((uint64_t)x7 * x50) + (((uint64_t)x9 * x51) + (((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + (((uint64_t)x25 * x35) + (((uint64_t)x27 * x33) + ((uint64_t)x26 * x31))))))))))))));
-{ uint64_t x65 = (x64 >> 0x1a);
-{ uint32_t x66 = ((uint32_t)x64 & 0x3ffffff);
-{ uint64_t x67 = (x65 + x63);
-{ uint64_t x68 = (x67 >> 0x1a);
-{ uint32_t x69 = ((uint32_t)x67 & 0x3ffffff);
-{ uint64_t x70 = (x68 + x62);
-{ uint64_t x71 = (x70 >> 0x1a);
-{ uint32_t x72 = ((uint32_t)x70 & 0x3ffffff);
-{ uint64_t x73 = (x71 + x61);
-{ uint64_t x74 = (x73 >> 0x1a);
-{ uint32_t x75 = ((uint32_t)x73 & 0x3ffffff);
-{ uint64_t x76 = (x74 + x60);
-{ uint64_t x77 = (x76 >> 0x1a);
-{ uint32_t x78 = ((uint32_t)x76 & 0x3ffffff);
-{ uint64_t x79 = (x77 + x59);
-{ uint64_t x80 = (x79 >> 0x1a);
-{ uint32_t x81 = ((uint32_t)x79 & 0x3ffffff);
-{ uint64_t x82 = (x80 + x58);
-{ uint64_t x83 = (x82 >> 0x1a);
-{ uint32_t x84 = ((uint32_t)x82 & 0x3ffffff);
-{ uint64_t x85 = (x83 + x57);
-{ uint64_t x86 = (x85 >> 0x1a);
-{ uint32_t x87 = ((uint32_t)x85 & 0x3ffffff);
-{ uint64_t x88 = (x86 + x56);
-{ uint64_t x89 = (x88 >> 0x1a);
-{ uint32_t x90 = ((uint32_t)x88 & 0x3ffffff);
-{ uint64_t x91 = (x89 + x55);
-{ uint64_t x92 = (x91 >> 0x1a);
-{ uint32_t x93 = ((uint32_t)x91 & 0x3ffffff);
-{ uint64_t x94 = (x92 + x54);
-{ uint64_t x95 = (x94 >> 0x1a);
-{ uint32_t x96 = ((uint32_t)x94 & 0x3ffffff);
-{ uint64_t x97 = (x95 + x53);
-{ uint64_t x98 = (x97 >> 0x1a);
-{ uint32_t x99 = ((uint32_t)x97 & 0x3ffffff);
-{ uint64_t x100 = (x98 + x52);
-{ uint64_t x101 = (x100 >> 0x1a);
-{ uint32_t x102 = ((uint32_t)x100 & 0x3ffffff);
-{ uint64_t x103 = (x66 + (0xf * x101));
-{ uint32_t x104 = (uint32_t) (x103 >> 0x1a);
-{ uint32_t x105 = ((uint32_t)x103 & 0x3ffffff);
-{ uint32_t x106 = (x104 + x69);
-{ uint32_t x107 = (x106 >> 0x1a);
-{ uint32_t x108 = (x106 & 0x3ffffff);
-out[0] = x102;
-out[1] = x99;
-out[2] = x96;
-out[3] = x93;
-out[4] = x90;
-out[5] = x87;
-out[6] = x84;
-out[7] = x81;
-out[8] = x78;
-out[9] = x75;
-out[10] = x107 + x72;
-out[11] = x108;
-out[12] = x105;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[13];
+static void femul(uint32_t out[13], const uint32_t in1[13], const uint32_t in2[13]) {
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x50 = in2[12];
+ { const uint32_t x51 = in2[11];
+ { const uint32_t x49 = in2[10];
+ { const uint32_t x47 = in2[9];
+ { const uint32_t x45 = in2[8];
+ { const uint32_t x43 = in2[7];
+ { const uint32_t x41 = in2[6];
+ { const uint32_t x39 = in2[5];
+ { const uint32_t x37 = in2[4];
+ { const uint32_t x35 = in2[3];
+ { const uint32_t x33 = in2[2];
+ { const uint32_t x31 = in2[1];
+ { const uint32_t x29 = in2[0];
+ { uint64_t x52 = (((uint64_t)x5 * x50) + (((uint64_t)x7 * x51) + (((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + (((uint64_t)x27 * x31) + ((uint64_t)x26 * x29)))))))))))));
+ { uint64_t x53 = ((((uint64_t)x5 * x51) + (((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + (((uint64_t)x23 * x33) + (((uint64_t)x25 * x31) + ((uint64_t)x27 * x29)))))))))))) + (0xf * ((uint64_t)x26 * x50)));
+ { uint64_t x54 = ((((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + (((uint64_t)x23 * x31) + ((uint64_t)x25 * x29))))))))))) + (0xf * (((uint64_t)x27 * x50) + ((uint64_t)x26 * x51))));
+ { uint64_t x55 = ((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((uint64_t)x23 * x29)))))))))) + (0xf * (((uint64_t)x25 * x50) + (((uint64_t)x27 * x51) + ((uint64_t)x26 * x49)))));
+ { uint64_t x56 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + ((uint64_t)x21 * x29))))))))) + (0xf * (((uint64_t)x23 * x50) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + ((uint64_t)x26 * x47))))));
+ { uint64_t x57 = ((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((uint64_t)x19 * x29)))))))) + (0xf * (((uint64_t)x21 * x50) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + ((uint64_t)x26 * x45)))))));
+ { uint64_t x58 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + ((uint64_t)x17 * x29))))))) + (0xf * (((uint64_t)x19 * x50) + (((uint64_t)x21 * x51) + (((uint64_t)x23 * x49) + (((uint64_t)x25 * x47) + (((uint64_t)x27 * x45) + ((uint64_t)x26 * x43))))))));
+ { uint64_t x59 = ((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + ((uint64_t)x15 * x29)))))) + (0xf * (((uint64_t)x17 * x50) + (((uint64_t)x19 * x51) + (((uint64_t)x21 * x49) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + (((uint64_t)x27 * x43) + ((uint64_t)x26 * x41)))))))));
+ { uint64_t x60 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + ((uint64_t)x13 * x29))))) + (0xf * (((uint64_t)x15 * x50) + (((uint64_t)x17 * x51) + (((uint64_t)x19 * x49) + (((uint64_t)x21 * x47) + (((uint64_t)x23 * x45) + (((uint64_t)x25 * x43) + (((uint64_t)x27 * x41) + ((uint64_t)x26 * x39))))))))));
+ { uint64_t x61 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + ((uint64_t)x11 * x29)))) + (0xf * (((uint64_t)x13 * x50) + (((uint64_t)x15 * x51) + (((uint64_t)x17 * x49) + (((uint64_t)x19 * x47) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + (((uint64_t)x27 * x39) + ((uint64_t)x26 * x37)))))))))));
+ { uint64_t x62 = ((((uint64_t)x5 * x33) + (((uint64_t)x7 * x31) + ((uint64_t)x9 * x29))) + (0xf * (((uint64_t)x11 * x50) + (((uint64_t)x13 * x51) + (((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + (((uint64_t)x19 * x45) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + (((uint64_t)x25 * x39) + (((uint64_t)x27 * x37) + ((uint64_t)x26 * x35))))))))))));
+ { uint64_t x63 = ((((uint64_t)x5 * x31) + ((uint64_t)x7 * x29)) + (0xf * (((uint64_t)x9 * x50) + (((uint64_t)x11 * x51) + (((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + (((uint64_t)x23 * x39) + (((uint64_t)x25 * x37) + (((uint64_t)x27 * x35) + ((uint64_t)x26 * x33)))))))))))));
+ { uint64_t x64 = (((uint64_t)x5 * x29) + (0xf * (((uint64_t)x7 * x50) + (((uint64_t)x9 * x51) + (((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + (((uint64_t)x25 * x35) + (((uint64_t)x27 * x33) + ((uint64_t)x26 * x31))))))))))))));
+ { uint64_t x65 = (x64 >> 0x1a);
+ { uint32_t x66 = ((uint32_t)x64 & 0x3ffffff);
+ { uint64_t x67 = (x65 + x63);
+ { uint64_t x68 = (x67 >> 0x1a);
+ { uint32_t x69 = ((uint32_t)x67 & 0x3ffffff);
+ { uint64_t x70 = (x68 + x62);
+ { uint64_t x71 = (x70 >> 0x1a);
+ { uint32_t x72 = ((uint32_t)x70 & 0x3ffffff);
+ { uint64_t x73 = (x71 + x61);
+ { uint64_t x74 = (x73 >> 0x1a);
+ { uint32_t x75 = ((uint32_t)x73 & 0x3ffffff);
+ { uint64_t x76 = (x74 + x60);
+ { uint64_t x77 = (x76 >> 0x1a);
+ { uint32_t x78 = ((uint32_t)x76 & 0x3ffffff);
+ { uint64_t x79 = (x77 + x59);
+ { uint64_t x80 = (x79 >> 0x1a);
+ { uint32_t x81 = ((uint32_t)x79 & 0x3ffffff);
+ { uint64_t x82 = (x80 + x58);
+ { uint64_t x83 = (x82 >> 0x1a);
+ { uint32_t x84 = ((uint32_t)x82 & 0x3ffffff);
+ { uint64_t x85 = (x83 + x57);
+ { uint64_t x86 = (x85 >> 0x1a);
+ { uint32_t x87 = ((uint32_t)x85 & 0x3ffffff);
+ { uint64_t x88 = (x86 + x56);
+ { uint64_t x89 = (x88 >> 0x1a);
+ { uint32_t x90 = ((uint32_t)x88 & 0x3ffffff);
+ { uint64_t x91 = (x89 + x55);
+ { uint64_t x92 = (x91 >> 0x1a);
+ { uint32_t x93 = ((uint32_t)x91 & 0x3ffffff);
+ { uint64_t x94 = (x92 + x54);
+ { uint64_t x95 = (x94 >> 0x1a);
+ { uint32_t x96 = ((uint32_t)x94 & 0x3ffffff);
+ { uint64_t x97 = (x95 + x53);
+ { uint64_t x98 = (x97 >> 0x1a);
+ { uint32_t x99 = ((uint32_t)x97 & 0x3ffffff);
+ { uint64_t x100 = (x98 + x52);
+ { uint64_t x101 = (x100 >> 0x1a);
+ { uint32_t x102 = ((uint32_t)x100 & 0x3ffffff);
+ { uint64_t x103 = (x66 + (0xf * x101));
+ { uint32_t x104 = (uint32_t) (x103 >> 0x1a);
+ { uint32_t x105 = ((uint32_t)x103 & 0x3ffffff);
+ { uint32_t x106 = (x104 + x69);
+ { uint32_t x107 = (x106 >> 0x1a);
+ { uint32_t x108 = (x106 & 0x3ffffff);
+ out[0] = x105;
+ out[1] = x108;
+ out[2] = (x107 + x72);
+ out[3] = x75;
+ out[4] = x78;
+ out[5] = x81;
+ out[6] = x84;
+ out[7] = x87;
+ out[8] = x90;
+ out[9] = x93;
+ out[10] = x96;
+ out[11] = x99;
+ out[12] = x102;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e338m15/fesquare.c b/src/Specific/solinas32_2e338m15/fesquare.c
index d5ee7aa1d..667230a95 100644
--- a/src/Specific/solinas32_2e338m15/fesquare.c
+++ b/src/Specific/solinas32_2e338m15/fesquare.c
@@ -1,91 +1,86 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x23, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x25 = (((uint64_t)x2 * x23) + (((uint64_t)x4 * x24) + (((uint64_t)x6 * x22) + (((uint64_t)x8 * x20) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + (((uint64_t)x20 * x8) + (((uint64_t)x22 * x6) + (((uint64_t)x24 * x4) + ((uint64_t)x23 * x2)))))))))))));
-{ uint64_t x26 = ((((uint64_t)x2 * x24) + (((uint64_t)x4 * x22) + (((uint64_t)x6 * x20) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + (((uint64_t)x20 * x6) + (((uint64_t)x22 * x4) + ((uint64_t)x24 * x2)))))))))))) + (0xf * ((uint64_t)x23 * x23)));
-{ uint64_t x27 = ((((uint64_t)x2 * x22) + (((uint64_t)x4 * x20) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + (((uint64_t)x20 * x4) + ((uint64_t)x22 * x2))))))))))) + (0xf * (((uint64_t)x24 * x23) + ((uint64_t)x23 * x24))));
-{ uint64_t x28 = ((((uint64_t)x2 * x20) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x20 * x2)))))))))) + (0xf * (((uint64_t)x22 * x23) + (((uint64_t)x24 * x24) + ((uint64_t)x23 * x22)))));
-{ uint64_t x29 = ((((uint64_t)x2 * x18) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x18 * x2))))))))) + (0xf * (((uint64_t)x20 * x23) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + ((uint64_t)x23 * x20))))));
-{ uint64_t x30 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0xf * (((uint64_t)x18 * x23) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + ((uint64_t)x23 * x18)))))));
-{ uint64_t x31 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0xf * (((uint64_t)x16 * x23) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + ((uint64_t)x23 * x16))))))));
-{ uint64_t x32 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0xf * (((uint64_t)x14 * x23) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + ((uint64_t)x23 * x14)))))))));
-{ uint64_t x33 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0xf * (((uint64_t)x12 * x23) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + ((uint64_t)x23 * x12))))))))));
-{ uint64_t x34 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0xf * (((uint64_t)x10 * x23) + (((uint64_t)x12 * x24) + (((uint64_t)x14 * x22) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + (((uint64_t)x24 * x12) + ((uint64_t)x23 * x10)))))))))));
-{ uint64_t x35 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0xf * (((uint64_t)x8 * x23) + (((uint64_t)x10 * x24) + (((uint64_t)x12 * x22) + (((uint64_t)x14 * x20) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + (((uint64_t)x20 * x14) + (((uint64_t)x22 * x12) + (((uint64_t)x24 * x10) + ((uint64_t)x23 * x8))))))))))));
-{ uint64_t x36 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0xf * (((uint64_t)x6 * x23) + (((uint64_t)x8 * x24) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + (((uint64_t)x24 * x8) + ((uint64_t)x23 * x6)))))))))))));
-{ uint64_t x37 = (((uint64_t)x2 * x2) + (0xf * (((uint64_t)x4 * x23) + (((uint64_t)x6 * x24) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + (((uint64_t)x24 * x6) + ((uint64_t)x23 * x4))))))))))))));
-{ uint64_t x38 = (x37 >> 0x1a);
-{ uint32_t x39 = ((uint32_t)x37 & 0x3ffffff);
-{ uint64_t x40 = (x38 + x36);
-{ uint64_t x41 = (x40 >> 0x1a);
-{ uint32_t x42 = ((uint32_t)x40 & 0x3ffffff);
-{ uint64_t x43 = (x41 + x35);
-{ uint64_t x44 = (x43 >> 0x1a);
-{ uint32_t x45 = ((uint32_t)x43 & 0x3ffffff);
-{ uint64_t x46 = (x44 + x34);
-{ uint64_t x47 = (x46 >> 0x1a);
-{ uint32_t x48 = ((uint32_t)x46 & 0x3ffffff);
-{ uint64_t x49 = (x47 + x33);
-{ uint64_t x50 = (x49 >> 0x1a);
-{ uint32_t x51 = ((uint32_t)x49 & 0x3ffffff);
-{ uint64_t x52 = (x50 + x32);
-{ uint64_t x53 = (x52 >> 0x1a);
-{ uint32_t x54 = ((uint32_t)x52 & 0x3ffffff);
-{ uint64_t x55 = (x53 + x31);
-{ uint64_t x56 = (x55 >> 0x1a);
-{ uint32_t x57 = ((uint32_t)x55 & 0x3ffffff);
-{ uint64_t x58 = (x56 + x30);
-{ uint64_t x59 = (x58 >> 0x1a);
-{ uint32_t x60 = ((uint32_t)x58 & 0x3ffffff);
-{ uint64_t x61 = (x59 + x29);
-{ uint64_t x62 = (x61 >> 0x1a);
-{ uint32_t x63 = ((uint32_t)x61 & 0x3ffffff);
-{ uint64_t x64 = (x62 + x28);
-{ uint64_t x65 = (x64 >> 0x1a);
-{ uint32_t x66 = ((uint32_t)x64 & 0x3ffffff);
-{ uint64_t x67 = (x65 + x27);
-{ uint64_t x68 = (x67 >> 0x1a);
-{ uint32_t x69 = ((uint32_t)x67 & 0x3ffffff);
-{ uint64_t x70 = (x68 + x26);
-{ uint64_t x71 = (x70 >> 0x1a);
-{ uint32_t x72 = ((uint32_t)x70 & 0x3ffffff);
-{ uint64_t x73 = (x71 + x25);
-{ uint64_t x74 = (x73 >> 0x1a);
-{ uint32_t x75 = ((uint32_t)x73 & 0x3ffffff);
-{ uint64_t x76 = (x39 + (0xf * x74));
-{ uint32_t x77 = (uint32_t) (x76 >> 0x1a);
-{ uint32_t x78 = ((uint32_t)x76 & 0x3ffffff);
-{ uint32_t x79 = (x77 + x42);
-{ uint32_t x80 = (x79 >> 0x1a);
-{ uint32_t x81 = (x79 & 0x3ffffff);
-out[0] = x75;
-out[1] = x72;
-out[2] = x69;
-out[3] = x66;
-out[4] = x63;
-out[5] = x60;
-out[6] = x57;
-out[7] = x54;
-out[8] = x51;
-out[9] = x48;
-out[10] = x80 + x45;
-out[11] = x81;
-out[12] = x78;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[13];
+static void fesquare(uint32_t out[13], const uint32_t in1[13]) {
+ { const uint32_t x23 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x25 = (((uint64_t)x2 * x23) + (((uint64_t)x4 * x24) + (((uint64_t)x6 * x22) + (((uint64_t)x8 * x20) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + (((uint64_t)x20 * x8) + (((uint64_t)x22 * x6) + (((uint64_t)x24 * x4) + ((uint64_t)x23 * x2)))))))))))));
+ { uint64_t x26 = ((((uint64_t)x2 * x24) + (((uint64_t)x4 * x22) + (((uint64_t)x6 * x20) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + (((uint64_t)x20 * x6) + (((uint64_t)x22 * x4) + ((uint64_t)x24 * x2)))))))))))) + (0xf * ((uint64_t)x23 * x23)));
+ { uint64_t x27 = ((((uint64_t)x2 * x22) + (((uint64_t)x4 * x20) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + (((uint64_t)x20 * x4) + ((uint64_t)x22 * x2))))))))))) + (0xf * (((uint64_t)x24 * x23) + ((uint64_t)x23 * x24))));
+ { uint64_t x28 = ((((uint64_t)x2 * x20) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x20 * x2)))))))))) + (0xf * (((uint64_t)x22 * x23) + (((uint64_t)x24 * x24) + ((uint64_t)x23 * x22)))));
+ { uint64_t x29 = ((((uint64_t)x2 * x18) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x18 * x2))))))))) + (0xf * (((uint64_t)x20 * x23) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + ((uint64_t)x23 * x20))))));
+ { uint64_t x30 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0xf * (((uint64_t)x18 * x23) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + ((uint64_t)x23 * x18)))))));
+ { uint64_t x31 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0xf * (((uint64_t)x16 * x23) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + ((uint64_t)x23 * x16))))))));
+ { uint64_t x32 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0xf * (((uint64_t)x14 * x23) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + ((uint64_t)x23 * x14)))))))));
+ { uint64_t x33 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0xf * (((uint64_t)x12 * x23) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + ((uint64_t)x23 * x12))))))))));
+ { uint64_t x34 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0xf * (((uint64_t)x10 * x23) + (((uint64_t)x12 * x24) + (((uint64_t)x14 * x22) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + (((uint64_t)x24 * x12) + ((uint64_t)x23 * x10)))))))))));
+ { uint64_t x35 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0xf * (((uint64_t)x8 * x23) + (((uint64_t)x10 * x24) + (((uint64_t)x12 * x22) + (((uint64_t)x14 * x20) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + (((uint64_t)x20 * x14) + (((uint64_t)x22 * x12) + (((uint64_t)x24 * x10) + ((uint64_t)x23 * x8))))))))))));
+ { uint64_t x36 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0xf * (((uint64_t)x6 * x23) + (((uint64_t)x8 * x24) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + (((uint64_t)x24 * x8) + ((uint64_t)x23 * x6)))))))))))));
+ { uint64_t x37 = (((uint64_t)x2 * x2) + (0xf * (((uint64_t)x4 * x23) + (((uint64_t)x6 * x24) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + (((uint64_t)x24 * x6) + ((uint64_t)x23 * x4))))))))))))));
+ { uint64_t x38 = (x37 >> 0x1a);
+ { uint32_t x39 = ((uint32_t)x37 & 0x3ffffff);
+ { uint64_t x40 = (x38 + x36);
+ { uint64_t x41 = (x40 >> 0x1a);
+ { uint32_t x42 = ((uint32_t)x40 & 0x3ffffff);
+ { uint64_t x43 = (x41 + x35);
+ { uint64_t x44 = (x43 >> 0x1a);
+ { uint32_t x45 = ((uint32_t)x43 & 0x3ffffff);
+ { uint64_t x46 = (x44 + x34);
+ { uint64_t x47 = (x46 >> 0x1a);
+ { uint32_t x48 = ((uint32_t)x46 & 0x3ffffff);
+ { uint64_t x49 = (x47 + x33);
+ { uint64_t x50 = (x49 >> 0x1a);
+ { uint32_t x51 = ((uint32_t)x49 & 0x3ffffff);
+ { uint64_t x52 = (x50 + x32);
+ { uint64_t x53 = (x52 >> 0x1a);
+ { uint32_t x54 = ((uint32_t)x52 & 0x3ffffff);
+ { uint64_t x55 = (x53 + x31);
+ { uint64_t x56 = (x55 >> 0x1a);
+ { uint32_t x57 = ((uint32_t)x55 & 0x3ffffff);
+ { uint64_t x58 = (x56 + x30);
+ { uint64_t x59 = (x58 >> 0x1a);
+ { uint32_t x60 = ((uint32_t)x58 & 0x3ffffff);
+ { uint64_t x61 = (x59 + x29);
+ { uint64_t x62 = (x61 >> 0x1a);
+ { uint32_t x63 = ((uint32_t)x61 & 0x3ffffff);
+ { uint64_t x64 = (x62 + x28);
+ { uint64_t x65 = (x64 >> 0x1a);
+ { uint32_t x66 = ((uint32_t)x64 & 0x3ffffff);
+ { uint64_t x67 = (x65 + x27);
+ { uint64_t x68 = (x67 >> 0x1a);
+ { uint32_t x69 = ((uint32_t)x67 & 0x3ffffff);
+ { uint64_t x70 = (x68 + x26);
+ { uint64_t x71 = (x70 >> 0x1a);
+ { uint32_t x72 = ((uint32_t)x70 & 0x3ffffff);
+ { uint64_t x73 = (x71 + x25);
+ { uint64_t x74 = (x73 >> 0x1a);
+ { uint32_t x75 = ((uint32_t)x73 & 0x3ffffff);
+ { uint64_t x76 = (x39 + (0xf * x74));
+ { uint32_t x77 = (uint32_t) (x76 >> 0x1a);
+ { uint32_t x78 = ((uint32_t)x76 & 0x3ffffff);
+ { uint32_t x79 = (x77 + x42);
+ { uint32_t x80 = (x79 >> 0x1a);
+ { uint32_t x81 = (x79 & 0x3ffffff);
+ out[0] = x78;
+ out[1] = x81;
+ out[2] = (x80 + x45);
+ out[3] = x48;
+ out[4] = x51;
+ out[5] = x54;
+ out[6] = x57;
+ out[7] = x60;
+ out[8] = x63;
+ out[9] = x66;
+ out[10] = x69;
+ out[11] = x72;
+ out[12] = x75;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e338m15/freeze.c b/src/Specific/solinas32_2e338m15/freeze.c
index 80856537d..adc3007f2 100644
--- a/src/Specific/solinas32_2e338m15/freeze.c
+++ b/src/Specific/solinas32_2e338m15/freeze.c
@@ -1,25 +1,69 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x23, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x26;
-out[1] = uint8_t x27 = Op Syntax.SubWithGetBorrow 26 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3fffff1;;
+static void freeze(uint32_t out[13], const uint32_t in1[13]) {
+ { const uint32_t x23 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x26, uint8_t x27 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x3fffff1);
+ { uint32_t x29, uint8_t x30 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x27, Return x4, 0x3ffffff);
+ { uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x30, Return x6, 0x3ffffff);
+ { uint32_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x8, 0x3ffffff);
+ { uint32_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x36, Return x10, 0x3ffffff);
+ { uint32_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x39, Return x12, 0x3ffffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x14, 0x3ffffff);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x16, 0x3ffffff);
+ { uint32_t x50, uint8_t x51 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x48, Return x18, 0x3ffffff);
+ { uint32_t x53, uint8_t x54 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x51, Return x20, 0x3ffffff);
+ { uint32_t x56, uint8_t x57 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x54, Return x22, 0x3ffffff);
+ { uint32_t x59, uint8_t x60 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x24, 0x3ffffff);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x23, 0x3ffffff);
+ { uint32_t x64 = (uint32_t)cmovznz(x63, 0x0, 0xffffffff);
+ { uint32_t x65 = (x64 & 0x3fffff1);
+ { uint32_t x67, uint8_t x68 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x26, Return x65);
+ { uint32_t x69 = (x64 & 0x3ffffff);
+ { uint32_t x71, uint8_t x72 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x68, Return x29, Return x69);
+ { uint32_t x73 = (x64 & 0x3ffffff);
+ { uint32_t x75, uint8_t x76 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x32, Return x73);
+ { uint32_t x77 = (x64 & 0x3ffffff);
+ { uint32_t x79, uint8_t x80 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x76, Return x35, Return x77);
+ { uint32_t x81 = (x64 & 0x3ffffff);
+ { uint32_t x83, uint8_t x84 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x80, Return x38, Return x81);
+ { uint32_t x85 = (x64 & 0x3ffffff);
+ { uint32_t x87, uint8_t x88 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x84, Return x41, Return x85);
+ { uint32_t x89 = (x64 & 0x3ffffff);
+ { uint32_t x91, uint8_t x92 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x88, Return x44, Return x89);
+ { uint32_t x93 = (x64 & 0x3ffffff);
+ { uint32_t x95, uint8_t x96 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x92, Return x47, Return x93);
+ { uint32_t x97 = (x64 & 0x3ffffff);
+ { uint32_t x99, uint8_t x100 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x96, Return x50, Return x97);
+ { uint32_t x101 = (x64 & 0x3ffffff);
+ { uint32_t x103, uint8_t x104 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x100, Return x53, Return x101);
+ { uint32_t x105 = (x64 & 0x3ffffff);
+ { uint32_t x107, uint8_t x108 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x104, Return x56, Return x105);
+ { uint32_t x109 = (x64 & 0x3ffffff);
+ { uint32_t x111, uint8_t x112 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x108, Return x59, Return x109);
+ { uint32_t x113 = (x64 & 0x3ffffff);
+ { uint32_t x115, uint8_t _ = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x112, Return x62, Return x113);
+ out[0] = x67;
+ out[1] = x71;
+ out[2] = x75;
+ out[3] = x79;
+ out[4] = x83;
+ out[5] = x87;
+ out[6] = x91;
+ out[7] = x95;
+ out[8] = x99;
+ out[9] = x103;
+ out[10] = x107;
+ out[11] = x111;
+ out[12] = x115;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e369m25/femul.c b/src/Specific/solinas32_2e369m25/femul.c
index 9d097727b..cc1c2a4b5 100644
--- a/src/Specific/solinas32_2e369m25/femul.c
+++ b/src/Specific/solinas32_2e369m25/femul.c
@@ -1,106 +1,120 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
-{ uint64_t x64 = (((uint64_t)x5 * x62) + ((0x2 * ((uint64_t)x7 * x63)) + ((0x2 * ((uint64_t)x9 * x61)) + ((0x2 * ((uint64_t)x11 * x59)) + ((0x2 * ((uint64_t)x13 * x57)) + ((0x2 * ((uint64_t)x15 * x55)) + ((0x2 * ((uint64_t)x17 * x53)) + ((0x2 * ((uint64_t)x19 * x51)) + ((0x2 * ((uint64_t)x21 * x49)) + ((0x2 * ((uint64_t)x23 * x47)) + ((0x2 * ((uint64_t)x25 * x45)) + ((0x2 * ((uint64_t)x27 * x43)) + ((0x2 * ((uint64_t)x29 * x41)) + ((0x2 * ((uint64_t)x31 * x39)) + ((0x2 * ((uint64_t)x33 * x37)) + ((uint64_t)x32 * x35))))))))))))))));
-{ uint64_t x65 = ((((uint64_t)x5 * x63) + ((0x2 * ((uint64_t)x7 * x61)) + ((0x2 * ((uint64_t)x9 * x59)) + ((0x2 * ((uint64_t)x11 * x57)) + ((0x2 * ((uint64_t)x13 * x55)) + ((0x2 * ((uint64_t)x15 * x53)) + ((0x2 * ((uint64_t)x17 * x51)) + ((0x2 * ((uint64_t)x19 * x49)) + ((0x2 * ((uint64_t)x21 * x47)) + ((0x2 * ((uint64_t)x23 * x45)) + ((0x2 * ((uint64_t)x25 * x43)) + ((0x2 * ((uint64_t)x27 * x41)) + ((0x2 * ((uint64_t)x29 * x39)) + ((0x2 * ((uint64_t)x31 * x37)) + ((uint64_t)x33 * x35))))))))))))))) + (0x19 * ((uint64_t)x32 * x62)));
-{ uint64_t x66 = ((((uint64_t)x5 * x61) + ((0x2 * ((uint64_t)x7 * x59)) + ((0x2 * ((uint64_t)x9 * x57)) + ((0x2 * ((uint64_t)x11 * x55)) + ((0x2 * ((uint64_t)x13 * x53)) + ((0x2 * ((uint64_t)x15 * x51)) + ((0x2 * ((uint64_t)x17 * x49)) + ((0x2 * ((uint64_t)x19 * x47)) + ((0x2 * ((uint64_t)x21 * x45)) + ((0x2 * ((uint64_t)x23 * x43)) + ((0x2 * ((uint64_t)x25 * x41)) + ((0x2 * ((uint64_t)x27 * x39)) + ((0x2 * ((uint64_t)x29 * x37)) + ((uint64_t)x31 * x35)))))))))))))) + (0x19 * (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))));
-{ uint64_t x67 = ((((uint64_t)x5 * x59) + ((0x2 * ((uint64_t)x7 * x57)) + ((0x2 * ((uint64_t)x9 * x55)) + ((0x2 * ((uint64_t)x11 * x53)) + ((0x2 * ((uint64_t)x13 * x51)) + ((0x2 * ((uint64_t)x15 * x49)) + ((0x2 * ((uint64_t)x17 * x47)) + ((0x2 * ((uint64_t)x19 * x45)) + ((0x2 * ((uint64_t)x21 * x43)) + ((0x2 * ((uint64_t)x23 * x41)) + ((0x2 * ((uint64_t)x25 * x39)) + ((0x2 * ((uint64_t)x27 * x37)) + ((uint64_t)x29 * x35))))))))))))) + (0x19 * (((uint64_t)x31 * x62) + (((uint64_t)x33 * x63) + ((uint64_t)x32 * x61)))));
-{ uint64_t x68 = ((((uint64_t)x5 * x57) + ((0x2 * ((uint64_t)x7 * x55)) + ((0x2 * ((uint64_t)x9 * x53)) + ((0x2 * ((uint64_t)x11 * x51)) + ((0x2 * ((uint64_t)x13 * x49)) + ((0x2 * ((uint64_t)x15 * x47)) + ((0x2 * ((uint64_t)x17 * x45)) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + ((0x2 * ((uint64_t)x23 * x39)) + ((0x2 * ((uint64_t)x25 * x37)) + ((uint64_t)x27 * x35)))))))))))) + (0x19 * (((uint64_t)x29 * x62) + (((uint64_t)x31 * x63) + (((uint64_t)x33 * x61) + ((uint64_t)x32 * x59))))));
-{ uint64_t x69 = ((((uint64_t)x5 * x55) + ((0x2 * ((uint64_t)x7 * x53)) + ((0x2 * ((uint64_t)x9 * x51)) + ((0x2 * ((uint64_t)x11 * x49)) + ((0x2 * ((uint64_t)x13 * x47)) + ((0x2 * ((uint64_t)x15 * x45)) + ((0x2 * ((uint64_t)x17 * x43)) + ((0x2 * ((uint64_t)x19 * x41)) + ((0x2 * ((uint64_t)x21 * x39)) + ((0x2 * ((uint64_t)x23 * x37)) + ((uint64_t)x25 * x35))))))))))) + (0x19 * (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + (((uint64_t)x31 * x61) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))));
-{ uint64_t x70 = ((((uint64_t)x5 * x53) + ((0x2 * ((uint64_t)x7 * x51)) + ((0x2 * ((uint64_t)x9 * x49)) + ((0x2 * ((uint64_t)x11 * x47)) + ((0x2 * ((uint64_t)x13 * x45)) + ((0x2 * ((uint64_t)x15 * x43)) + ((0x2 * ((uint64_t)x17 * x41)) + ((0x2 * ((uint64_t)x19 * x39)) + ((0x2 * ((uint64_t)x21 * x37)) + ((uint64_t)x23 * x35)))))))))) + (0x19 * (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))));
-{ uint64_t x71 = ((((uint64_t)x5 * x51) + ((0x2 * ((uint64_t)x7 * x49)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + ((uint64_t)x21 * x35))))))))) + (0x19 * (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))));
-{ uint64_t x72 = ((((uint64_t)x5 * x49) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + ((0x2 * ((uint64_t)x11 * x43)) + ((0x2 * ((uint64_t)x13 * x41)) + ((0x2 * ((uint64_t)x15 * x39)) + ((0x2 * ((uint64_t)x17 * x37)) + ((uint64_t)x19 * x35)))))))) + (0x19 * (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51))))))))));
-{ uint64_t x73 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + ((0x2 * ((uint64_t)x9 * x43)) + ((0x2 * ((uint64_t)x11 * x41)) + ((0x2 * ((uint64_t)x13 * x39)) + ((0x2 * ((uint64_t)x15 * x37)) + ((uint64_t)x17 * x35))))))) + (0x19 * (((uint64_t)x19 * x62) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + (((uint64_t)x33 * x51) + ((uint64_t)x32 * x49)))))))))));
-{ uint64_t x74 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + ((0x2 * ((uint64_t)x11 * x39)) + ((0x2 * ((uint64_t)x13 * x37)) + ((uint64_t)x15 * x35)))))) + (0x19 * (((uint64_t)x17 * x62) + (((uint64_t)x19 * x63) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + (((uint64_t)x31 * x51) + (((uint64_t)x33 * x49) + ((uint64_t)x32 * x47))))))))))));
-{ uint64_t x75 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((uint64_t)x13 * x35))))) + (0x19 * (((uint64_t)x15 * x62) + (((uint64_t)x17 * x63) + (((uint64_t)x19 * x61) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + (((uint64_t)x29 * x51) + (((uint64_t)x31 * x49) + (((uint64_t)x33 * x47) + ((uint64_t)x32 * x45)))))))))))));
-{ uint64_t x76 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((uint64_t)x11 * x35)))) + (0x19 * (((uint64_t)x13 * x62) + (((uint64_t)x15 * x63) + (((uint64_t)x17 * x61) + (((uint64_t)x19 * x59) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + (((uint64_t)x31 * x47) + (((uint64_t)x33 * x45) + ((uint64_t)x32 * x43))))))))))))));
-{ uint64_t x77 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((uint64_t)x9 * x35))) + (0x19 * (((uint64_t)x11 * x62) + (((uint64_t)x13 * x63) + (((uint64_t)x15 * x61) + (((uint64_t)x17 * x59) + (((uint64_t)x19 * x57) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + (((uint64_t)x31 * x45) + (((uint64_t)x33 * x43) + ((uint64_t)x32 * x41)))))))))))))));
-{ uint64_t x78 = ((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) + (0x19 * (((uint64_t)x9 * x62) + (((uint64_t)x11 * x63) + (((uint64_t)x13 * x61) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + (((uint64_t)x33 * x41) + ((uint64_t)x32 * x39))))))))))))))));
-{ uint64_t x79 = (((uint64_t)x5 * x35) + (0x19 * ((0x2 * ((uint64_t)x7 * x62)) + ((0x2 * ((uint64_t)x9 * x63)) + ((0x2 * ((uint64_t)x11 * x61)) + ((0x2 * ((uint64_t)x13 * x59)) + ((0x2 * ((uint64_t)x15 * x57)) + ((0x2 * ((uint64_t)x17 * x55)) + ((0x2 * ((uint64_t)x19 * x53)) + ((0x2 * ((uint64_t)x21 * x51)) + ((0x2 * ((uint64_t)x23 * x49)) + ((0x2 * ((uint64_t)x25 * x47)) + ((0x2 * ((uint64_t)x27 * x45)) + ((0x2 * ((uint64_t)x29 * x43)) + ((0x2 * ((uint64_t)x31 * x41)) + ((0x2 * ((uint64_t)x33 * x39)) + (0x2 * ((uint64_t)x32 * x37))))))))))))))))));
-{ uint64_t x80 = (x79 >> 0x18);
-{ uint32_t x81 = ((uint32_t)x79 & 0xffffff);
-{ uint64_t x82 = (x80 + x78);
-{ uint64_t x83 = (x82 >> 0x17);
-{ uint32_t x84 = ((uint32_t)x82 & 0x7fffff);
-{ uint64_t x85 = (x83 + x77);
-{ uint64_t x86 = (x85 >> 0x17);
-{ uint32_t x87 = ((uint32_t)x85 & 0x7fffff);
-{ uint64_t x88 = (x86 + x76);
-{ uint64_t x89 = (x88 >> 0x17);
-{ uint32_t x90 = ((uint32_t)x88 & 0x7fffff);
-{ uint64_t x91 = (x89 + x75);
-{ uint64_t x92 = (x91 >> 0x17);
-{ uint32_t x93 = ((uint32_t)x91 & 0x7fffff);
-{ uint64_t x94 = (x92 + x74);
-{ uint64_t x95 = (x94 >> 0x17);
-{ uint32_t x96 = ((uint32_t)x94 & 0x7fffff);
-{ uint64_t x97 = (x95 + x73);
-{ uint64_t x98 = (x97 >> 0x17);
-{ uint32_t x99 = ((uint32_t)x97 & 0x7fffff);
-{ uint64_t x100 = (x98 + x72);
-{ uint64_t x101 = (x100 >> 0x17);
-{ uint32_t x102 = ((uint32_t)x100 & 0x7fffff);
-{ uint64_t x103 = (x101 + x71);
-{ uint64_t x104 = (x103 >> 0x17);
-{ uint32_t x105 = ((uint32_t)x103 & 0x7fffff);
-{ uint64_t x106 = (x104 + x70);
-{ uint64_t x107 = (x106 >> 0x17);
-{ uint32_t x108 = ((uint32_t)x106 & 0x7fffff);
-{ uint64_t x109 = (x107 + x69);
-{ uint64_t x110 = (x109 >> 0x17);
-{ uint32_t x111 = ((uint32_t)x109 & 0x7fffff);
-{ uint64_t x112 = (x110 + x68);
-{ uint64_t x113 = (x112 >> 0x17);
-{ uint32_t x114 = ((uint32_t)x112 & 0x7fffff);
-{ uint64_t x115 = (x113 + x67);
-{ uint64_t x116 = (x115 >> 0x17);
-{ uint32_t x117 = ((uint32_t)x115 & 0x7fffff);
-{ uint64_t x118 = (x116 + x66);
-{ uint64_t x119 = (x118 >> 0x17);
-{ uint32_t x120 = ((uint32_t)x118 & 0x7fffff);
-{ uint64_t x121 = (x119 + x65);
-{ uint64_t x122 = (x121 >> 0x17);
-{ uint32_t x123 = ((uint32_t)x121 & 0x7fffff);
-{ uint64_t x124 = (x122 + x64);
-{ uint32_t x125 = (uint32_t) (x124 >> 0x17);
-{ uint32_t x126 = ((uint32_t)x124 & 0x7fffff);
-{ uint64_t x127 = (x81 + ((uint64_t)0x19 * x125));
-{ uint32_t x128 = (uint32_t) (x127 >> 0x18);
-{ uint32_t x129 = ((uint32_t)x127 & 0xffffff);
-{ uint32_t x130 = (x128 + x84);
-{ uint32_t x131 = (x130 >> 0x17);
-{ uint32_t x132 = (x130 & 0x7fffff);
-out[0] = x126;
-out[1] = x123;
-out[2] = x120;
-out[3] = x117;
-out[4] = x114;
-out[5] = x111;
-out[6] = x108;
-out[7] = x105;
-out[8] = x102;
-out[9] = x99;
-out[10] = x96;
-out[11] = x93;
-out[12] = x90;
-out[13] = x131 + x87;
-out[14] = x132;
-out[15] = x129;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[16];
+static void femul(uint32_t out[16], const uint32_t in1[16], const uint32_t in2[16]) {
+ { const uint32_t x32 = in1[15];
+ { const uint32_t x33 = in1[14];
+ { const uint32_t x31 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x62 = in2[15];
+ { const uint32_t x63 = in2[14];
+ { const uint32_t x61 = in2[13];
+ { const uint32_t x59 = in2[12];
+ { const uint32_t x57 = in2[11];
+ { const uint32_t x55 = in2[10];
+ { const uint32_t x53 = in2[9];
+ { const uint32_t x51 = in2[8];
+ { const uint32_t x49 = in2[7];
+ { const uint32_t x47 = in2[6];
+ { const uint32_t x45 = in2[5];
+ { const uint32_t x43 = in2[4];
+ { const uint32_t x41 = in2[3];
+ { const uint32_t x39 = in2[2];
+ { const uint32_t x37 = in2[1];
+ { const uint32_t x35 = in2[0];
+ { uint64_t x64 = (((uint64_t)x5 * x62) + ((0x2 * ((uint64_t)x7 * x63)) + ((0x2 * ((uint64_t)x9 * x61)) + ((0x2 * ((uint64_t)x11 * x59)) + ((0x2 * ((uint64_t)x13 * x57)) + ((0x2 * ((uint64_t)x15 * x55)) + ((0x2 * ((uint64_t)x17 * x53)) + ((0x2 * ((uint64_t)x19 * x51)) + ((0x2 * ((uint64_t)x21 * x49)) + ((0x2 * ((uint64_t)x23 * x47)) + ((0x2 * ((uint64_t)x25 * x45)) + ((0x2 * ((uint64_t)x27 * x43)) + ((0x2 * ((uint64_t)x29 * x41)) + ((0x2 * ((uint64_t)x31 * x39)) + ((0x2 * ((uint64_t)x33 * x37)) + ((uint64_t)x32 * x35))))))))))))))));
+ { uint64_t x65 = ((((uint64_t)x5 * x63) + ((0x2 * ((uint64_t)x7 * x61)) + ((0x2 * ((uint64_t)x9 * x59)) + ((0x2 * ((uint64_t)x11 * x57)) + ((0x2 * ((uint64_t)x13 * x55)) + ((0x2 * ((uint64_t)x15 * x53)) + ((0x2 * ((uint64_t)x17 * x51)) + ((0x2 * ((uint64_t)x19 * x49)) + ((0x2 * ((uint64_t)x21 * x47)) + ((0x2 * ((uint64_t)x23 * x45)) + ((0x2 * ((uint64_t)x25 * x43)) + ((0x2 * ((uint64_t)x27 * x41)) + ((0x2 * ((uint64_t)x29 * x39)) + ((0x2 * ((uint64_t)x31 * x37)) + ((uint64_t)x33 * x35))))))))))))))) + (0x19 * ((uint64_t)x32 * x62)));
+ { uint64_t x66 = ((((uint64_t)x5 * x61) + ((0x2 * ((uint64_t)x7 * x59)) + ((0x2 * ((uint64_t)x9 * x57)) + ((0x2 * ((uint64_t)x11 * x55)) + ((0x2 * ((uint64_t)x13 * x53)) + ((0x2 * ((uint64_t)x15 * x51)) + ((0x2 * ((uint64_t)x17 * x49)) + ((0x2 * ((uint64_t)x19 * x47)) + ((0x2 * ((uint64_t)x21 * x45)) + ((0x2 * ((uint64_t)x23 * x43)) + ((0x2 * ((uint64_t)x25 * x41)) + ((0x2 * ((uint64_t)x27 * x39)) + ((0x2 * ((uint64_t)x29 * x37)) + ((uint64_t)x31 * x35)))))))))))))) + (0x19 * (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))));
+ { uint64_t x67 = ((((uint64_t)x5 * x59) + ((0x2 * ((uint64_t)x7 * x57)) + ((0x2 * ((uint64_t)x9 * x55)) + ((0x2 * ((uint64_t)x11 * x53)) + ((0x2 * ((uint64_t)x13 * x51)) + ((0x2 * ((uint64_t)x15 * x49)) + ((0x2 * ((uint64_t)x17 * x47)) + ((0x2 * ((uint64_t)x19 * x45)) + ((0x2 * ((uint64_t)x21 * x43)) + ((0x2 * ((uint64_t)x23 * x41)) + ((0x2 * ((uint64_t)x25 * x39)) + ((0x2 * ((uint64_t)x27 * x37)) + ((uint64_t)x29 * x35))))))))))))) + (0x19 * (((uint64_t)x31 * x62) + (((uint64_t)x33 * x63) + ((uint64_t)x32 * x61)))));
+ { uint64_t x68 = ((((uint64_t)x5 * x57) + ((0x2 * ((uint64_t)x7 * x55)) + ((0x2 * ((uint64_t)x9 * x53)) + ((0x2 * ((uint64_t)x11 * x51)) + ((0x2 * ((uint64_t)x13 * x49)) + ((0x2 * ((uint64_t)x15 * x47)) + ((0x2 * ((uint64_t)x17 * x45)) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + ((0x2 * ((uint64_t)x23 * x39)) + ((0x2 * ((uint64_t)x25 * x37)) + ((uint64_t)x27 * x35)))))))))))) + (0x19 * (((uint64_t)x29 * x62) + (((uint64_t)x31 * x63) + (((uint64_t)x33 * x61) + ((uint64_t)x32 * x59))))));
+ { uint64_t x69 = ((((uint64_t)x5 * x55) + ((0x2 * ((uint64_t)x7 * x53)) + ((0x2 * ((uint64_t)x9 * x51)) + ((0x2 * ((uint64_t)x11 * x49)) + ((0x2 * ((uint64_t)x13 * x47)) + ((0x2 * ((uint64_t)x15 * x45)) + ((0x2 * ((uint64_t)x17 * x43)) + ((0x2 * ((uint64_t)x19 * x41)) + ((0x2 * ((uint64_t)x21 * x39)) + ((0x2 * ((uint64_t)x23 * x37)) + ((uint64_t)x25 * x35))))))))))) + (0x19 * (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + (((uint64_t)x31 * x61) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))));
+ { uint64_t x70 = ((((uint64_t)x5 * x53) + ((0x2 * ((uint64_t)x7 * x51)) + ((0x2 * ((uint64_t)x9 * x49)) + ((0x2 * ((uint64_t)x11 * x47)) + ((0x2 * ((uint64_t)x13 * x45)) + ((0x2 * ((uint64_t)x15 * x43)) + ((0x2 * ((uint64_t)x17 * x41)) + ((0x2 * ((uint64_t)x19 * x39)) + ((0x2 * ((uint64_t)x21 * x37)) + ((uint64_t)x23 * x35)))))))))) + (0x19 * (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))));
+ { uint64_t x71 = ((((uint64_t)x5 * x51) + ((0x2 * ((uint64_t)x7 * x49)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + ((uint64_t)x21 * x35))))))))) + (0x19 * (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))));
+ { uint64_t x72 = ((((uint64_t)x5 * x49) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + ((0x2 * ((uint64_t)x11 * x43)) + ((0x2 * ((uint64_t)x13 * x41)) + ((0x2 * ((uint64_t)x15 * x39)) + ((0x2 * ((uint64_t)x17 * x37)) + ((uint64_t)x19 * x35)))))))) + (0x19 * (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51))))))))));
+ { uint64_t x73 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + ((0x2 * ((uint64_t)x9 * x43)) + ((0x2 * ((uint64_t)x11 * x41)) + ((0x2 * ((uint64_t)x13 * x39)) + ((0x2 * ((uint64_t)x15 * x37)) + ((uint64_t)x17 * x35))))))) + (0x19 * (((uint64_t)x19 * x62) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + (((uint64_t)x33 * x51) + ((uint64_t)x32 * x49)))))))))));
+ { uint64_t x74 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + ((0x2 * ((uint64_t)x11 * x39)) + ((0x2 * ((uint64_t)x13 * x37)) + ((uint64_t)x15 * x35)))))) + (0x19 * (((uint64_t)x17 * x62) + (((uint64_t)x19 * x63) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + (((uint64_t)x31 * x51) + (((uint64_t)x33 * x49) + ((uint64_t)x32 * x47))))))))))));
+ { uint64_t x75 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((uint64_t)x13 * x35))))) + (0x19 * (((uint64_t)x15 * x62) + (((uint64_t)x17 * x63) + (((uint64_t)x19 * x61) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + (((uint64_t)x29 * x51) + (((uint64_t)x31 * x49) + (((uint64_t)x33 * x47) + ((uint64_t)x32 * x45)))))))))))));
+ { uint64_t x76 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((uint64_t)x11 * x35)))) + (0x19 * (((uint64_t)x13 * x62) + (((uint64_t)x15 * x63) + (((uint64_t)x17 * x61) + (((uint64_t)x19 * x59) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + (((uint64_t)x31 * x47) + (((uint64_t)x33 * x45) + ((uint64_t)x32 * x43))))))))))))));
+ { uint64_t x77 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((uint64_t)x9 * x35))) + (0x19 * (((uint64_t)x11 * x62) + (((uint64_t)x13 * x63) + (((uint64_t)x15 * x61) + (((uint64_t)x17 * x59) + (((uint64_t)x19 * x57) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + (((uint64_t)x31 * x45) + (((uint64_t)x33 * x43) + ((uint64_t)x32 * x41)))))))))))))));
+ { uint64_t x78 = ((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) + (0x19 * (((uint64_t)x9 * x62) + (((uint64_t)x11 * x63) + (((uint64_t)x13 * x61) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + (((uint64_t)x33 * x41) + ((uint64_t)x32 * x39))))))))))))))));
+ { uint64_t x79 = (((uint64_t)x5 * x35) + (0x19 * ((0x2 * ((uint64_t)x7 * x62)) + ((0x2 * ((uint64_t)x9 * x63)) + ((0x2 * ((uint64_t)x11 * x61)) + ((0x2 * ((uint64_t)x13 * x59)) + ((0x2 * ((uint64_t)x15 * x57)) + ((0x2 * ((uint64_t)x17 * x55)) + ((0x2 * ((uint64_t)x19 * x53)) + ((0x2 * ((uint64_t)x21 * x51)) + ((0x2 * ((uint64_t)x23 * x49)) + ((0x2 * ((uint64_t)x25 * x47)) + ((0x2 * ((uint64_t)x27 * x45)) + ((0x2 * ((uint64_t)x29 * x43)) + ((0x2 * ((uint64_t)x31 * x41)) + ((0x2 * ((uint64_t)x33 * x39)) + (0x2 * ((uint64_t)x32 * x37))))))))))))))))));
+ { uint64_t x80 = (x79 >> 0x18);
+ { uint32_t x81 = ((uint32_t)x79 & 0xffffff);
+ { uint64_t x82 = (x80 + x78);
+ { uint64_t x83 = (x82 >> 0x17);
+ { uint32_t x84 = ((uint32_t)x82 & 0x7fffff);
+ { uint64_t x85 = (x83 + x77);
+ { uint64_t x86 = (x85 >> 0x17);
+ { uint32_t x87 = ((uint32_t)x85 & 0x7fffff);
+ { uint64_t x88 = (x86 + x76);
+ { uint64_t x89 = (x88 >> 0x17);
+ { uint32_t x90 = ((uint32_t)x88 & 0x7fffff);
+ { uint64_t x91 = (x89 + x75);
+ { uint64_t x92 = (x91 >> 0x17);
+ { uint32_t x93 = ((uint32_t)x91 & 0x7fffff);
+ { uint64_t x94 = (x92 + x74);
+ { uint64_t x95 = (x94 >> 0x17);
+ { uint32_t x96 = ((uint32_t)x94 & 0x7fffff);
+ { uint64_t x97 = (x95 + x73);
+ { uint64_t x98 = (x97 >> 0x17);
+ { uint32_t x99 = ((uint32_t)x97 & 0x7fffff);
+ { uint64_t x100 = (x98 + x72);
+ { uint64_t x101 = (x100 >> 0x17);
+ { uint32_t x102 = ((uint32_t)x100 & 0x7fffff);
+ { uint64_t x103 = (x101 + x71);
+ { uint64_t x104 = (x103 >> 0x17);
+ { uint32_t x105 = ((uint32_t)x103 & 0x7fffff);
+ { uint64_t x106 = (x104 + x70);
+ { uint64_t x107 = (x106 >> 0x17);
+ { uint32_t x108 = ((uint32_t)x106 & 0x7fffff);
+ { uint64_t x109 = (x107 + x69);
+ { uint64_t x110 = (x109 >> 0x17);
+ { uint32_t x111 = ((uint32_t)x109 & 0x7fffff);
+ { uint64_t x112 = (x110 + x68);
+ { uint64_t x113 = (x112 >> 0x17);
+ { uint32_t x114 = ((uint32_t)x112 & 0x7fffff);
+ { uint64_t x115 = (x113 + x67);
+ { uint64_t x116 = (x115 >> 0x17);
+ { uint32_t x117 = ((uint32_t)x115 & 0x7fffff);
+ { uint64_t x118 = (x116 + x66);
+ { uint64_t x119 = (x118 >> 0x17);
+ { uint32_t x120 = ((uint32_t)x118 & 0x7fffff);
+ { uint64_t x121 = (x119 + x65);
+ { uint64_t x122 = (x121 >> 0x17);
+ { uint32_t x123 = ((uint32_t)x121 & 0x7fffff);
+ { uint64_t x124 = (x122 + x64);
+ { uint32_t x125 = (uint32_t) (x124 >> 0x17);
+ { uint32_t x126 = ((uint32_t)x124 & 0x7fffff);
+ { uint64_t x127 = (x81 + ((uint64_t)0x19 * x125));
+ { uint32_t x128 = (uint32_t) (x127 >> 0x18);
+ { uint32_t x129 = ((uint32_t)x127 & 0xffffff);
+ { uint32_t x130 = (x128 + x84);
+ { uint32_t x131 = (x130 >> 0x17);
+ { uint32_t x132 = (x130 & 0x7fffff);
+ out[0] = x129;
+ out[1] = x132;
+ out[2] = (x131 + x87);
+ out[3] = x90;
+ out[4] = x93;
+ out[5] = x96;
+ out[6] = x99;
+ out[7] = x102;
+ out[8] = x105;
+ out[9] = x108;
+ out[10] = x111;
+ out[11] = x114;
+ out[12] = x117;
+ out[13] = x120;
+ out[14] = x123;
+ out[15] = x126;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e369m25/fesquare.c b/src/Specific/solinas32_2e369m25/fesquare.c
index d04a598ec..dc12ffc32 100644
--- a/src/Specific/solinas32_2e369m25/fesquare.c
+++ b/src/Specific/solinas32_2e369m25/fesquare.c
@@ -1,106 +1,104 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x31 = (((uint64_t)x2 * x29) + ((0x2 * ((uint64_t)x4 * x30)) + ((0x2 * ((uint64_t)x6 * x28)) + ((0x2 * ((uint64_t)x8 * x26)) + ((0x2 * ((uint64_t)x10 * x24)) + ((0x2 * ((uint64_t)x12 * x22)) + ((0x2 * ((uint64_t)x14 * x20)) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + ((0x2 * ((uint64_t)x20 * x14)) + ((0x2 * ((uint64_t)x22 * x12)) + ((0x2 * ((uint64_t)x24 * x10)) + ((0x2 * ((uint64_t)x26 * x8)) + ((0x2 * ((uint64_t)x28 * x6)) + ((0x2 * ((uint64_t)x30 * x4)) + ((uint64_t)x29 * x2))))))))))))))));
-{ uint64_t x32 = ((((uint64_t)x2 * x30) + ((0x2 * ((uint64_t)x4 * x28)) + ((0x2 * ((uint64_t)x6 * x26)) + ((0x2 * ((uint64_t)x8 * x24)) + ((0x2 * ((uint64_t)x10 * x22)) + ((0x2 * ((uint64_t)x12 * x20)) + ((0x2 * ((uint64_t)x14 * x18)) + ((0x2 * ((uint64_t)x16 * x16)) + ((0x2 * ((uint64_t)x18 * x14)) + ((0x2 * ((uint64_t)x20 * x12)) + ((0x2 * ((uint64_t)x22 * x10)) + ((0x2 * ((uint64_t)x24 * x8)) + ((0x2 * ((uint64_t)x26 * x6)) + ((0x2 * ((uint64_t)x28 * x4)) + ((uint64_t)x30 * x2))))))))))))))) + (0x19 * ((uint64_t)x29 * x29)));
-{ uint64_t x33 = ((((uint64_t)x2 * x28) + ((0x2 * ((uint64_t)x4 * x26)) + ((0x2 * ((uint64_t)x6 * x24)) + ((0x2 * ((uint64_t)x8 * x22)) + ((0x2 * ((uint64_t)x10 * x20)) + ((0x2 * ((uint64_t)x12 * x18)) + ((0x2 * ((uint64_t)x14 * x16)) + ((0x2 * ((uint64_t)x16 * x14)) + ((0x2 * ((uint64_t)x18 * x12)) + ((0x2 * ((uint64_t)x20 * x10)) + ((0x2 * ((uint64_t)x22 * x8)) + ((0x2 * ((uint64_t)x24 * x6)) + ((0x2 * ((uint64_t)x26 * x4)) + ((uint64_t)x28 * x2)))))))))))))) + (0x19 * (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))));
-{ uint64_t x34 = ((((uint64_t)x2 * x26) + ((0x2 * ((uint64_t)x4 * x24)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + ((0x2 * ((uint64_t)x14 * x14)) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + ((0x2 * ((uint64_t)x24 * x4)) + ((uint64_t)x26 * x2))))))))))))) + (0x19 * (((uint64_t)x28 * x29) + (((uint64_t)x30 * x30) + ((uint64_t)x29 * x28)))));
-{ uint64_t x35 = ((((uint64_t)x2 * x24) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + ((0x2 * ((uint64_t)x8 * x18)) + ((0x2 * ((uint64_t)x10 * x16)) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + ((0x2 * ((uint64_t)x16 * x10)) + ((0x2 * ((uint64_t)x18 * x8)) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x24 * x2)))))))))))) + (0x19 * (((uint64_t)x26 * x29) + (((uint64_t)x28 * x30) + (((uint64_t)x30 * x28) + ((uint64_t)x29 * x26))))));
-{ uint64_t x36 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x19 * (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + (((uint64_t)x28 * x28) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))));
-{ uint64_t x37 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x19 * (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))));
-{ uint64_t x38 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x19 * (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))));
-{ uint64_t x39 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x19 * (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18))))))))));
-{ uint64_t x40 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x19 * (((uint64_t)x16 * x29) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + (((uint64_t)x30 * x18) + ((uint64_t)x29 * x16)))))))))));
-{ uint64_t x41 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x19 * (((uint64_t)x14 * x29) + (((uint64_t)x16 * x30) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + (((uint64_t)x28 * x18) + (((uint64_t)x30 * x16) + ((uint64_t)x29 * x14))))))))))));
-{ uint64_t x42 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x19 * (((uint64_t)x12 * x29) + (((uint64_t)x14 * x30) + (((uint64_t)x16 * x28) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + (((uint64_t)x26 * x18) + (((uint64_t)x28 * x16) + (((uint64_t)x30 * x14) + ((uint64_t)x29 * x12)))))))))))));
-{ uint64_t x43 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x19 * (((uint64_t)x10 * x29) + (((uint64_t)x12 * x30) + (((uint64_t)x14 * x28) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + (((uint64_t)x26 * x16) + (((uint64_t)x28 * x14) + (((uint64_t)x30 * x12) + ((uint64_t)x29 * x10))))))))))))));
-{ uint64_t x44 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x19 * (((uint64_t)x8 * x29) + (((uint64_t)x10 * x30) + (((uint64_t)x12 * x28) + (((uint64_t)x14 * x26) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + (((uint64_t)x26 * x14) + (((uint64_t)x28 * x12) + (((uint64_t)x30 * x10) + ((uint64_t)x29 * x8)))))))))))))));
-{ uint64_t x45 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x19 * (((uint64_t)x6 * x29) + (((uint64_t)x8 * x30) + (((uint64_t)x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + (((uint64_t)x28 * x10) + (((uint64_t)x30 * x8) + ((uint64_t)x29 * x6))))))))))))))));
-{ uint64_t x46 = (((uint64_t)x2 * x2) + (0x19 * ((0x2 * ((uint64_t)x4 * x29)) + ((0x2 * ((uint64_t)x6 * x30)) + ((0x2 * ((uint64_t)x8 * x28)) + ((0x2 * ((uint64_t)x10 * x26)) + ((0x2 * ((uint64_t)x12 * x24)) + ((0x2 * ((uint64_t)x14 * x22)) + ((0x2 * ((uint64_t)x16 * x20)) + ((0x2 * ((uint64_t)x18 * x18)) + ((0x2 * ((uint64_t)x20 * x16)) + ((0x2 * ((uint64_t)x22 * x14)) + ((0x2 * ((uint64_t)x24 * x12)) + ((0x2 * ((uint64_t)x26 * x10)) + ((0x2 * ((uint64_t)x28 * x8)) + ((0x2 * ((uint64_t)x30 * x6)) + (0x2 * ((uint64_t)x29 * x4))))))))))))))))));
-{ uint64_t x47 = (x46 >> 0x18);
-{ uint32_t x48 = ((uint32_t)x46 & 0xffffff);
-{ uint64_t x49 = (x47 + x45);
-{ uint64_t x50 = (x49 >> 0x17);
-{ uint32_t x51 = ((uint32_t)x49 & 0x7fffff);
-{ uint64_t x52 = (x50 + x44);
-{ uint64_t x53 = (x52 >> 0x17);
-{ uint32_t x54 = ((uint32_t)x52 & 0x7fffff);
-{ uint64_t x55 = (x53 + x43);
-{ uint64_t x56 = (x55 >> 0x17);
-{ uint32_t x57 = ((uint32_t)x55 & 0x7fffff);
-{ uint64_t x58 = (x56 + x42);
-{ uint64_t x59 = (x58 >> 0x17);
-{ uint32_t x60 = ((uint32_t)x58 & 0x7fffff);
-{ uint64_t x61 = (x59 + x41);
-{ uint64_t x62 = (x61 >> 0x17);
-{ uint32_t x63 = ((uint32_t)x61 & 0x7fffff);
-{ uint64_t x64 = (x62 + x40);
-{ uint64_t x65 = (x64 >> 0x17);
-{ uint32_t x66 = ((uint32_t)x64 & 0x7fffff);
-{ uint64_t x67 = (x65 + x39);
-{ uint64_t x68 = (x67 >> 0x17);
-{ uint32_t x69 = ((uint32_t)x67 & 0x7fffff);
-{ uint64_t x70 = (x68 + x38);
-{ uint64_t x71 = (x70 >> 0x17);
-{ uint32_t x72 = ((uint32_t)x70 & 0x7fffff);
-{ uint64_t x73 = (x71 + x37);
-{ uint64_t x74 = (x73 >> 0x17);
-{ uint32_t x75 = ((uint32_t)x73 & 0x7fffff);
-{ uint64_t x76 = (x74 + x36);
-{ uint64_t x77 = (x76 >> 0x17);
-{ uint32_t x78 = ((uint32_t)x76 & 0x7fffff);
-{ uint64_t x79 = (x77 + x35);
-{ uint64_t x80 = (x79 >> 0x17);
-{ uint32_t x81 = ((uint32_t)x79 & 0x7fffff);
-{ uint64_t x82 = (x80 + x34);
-{ uint64_t x83 = (x82 >> 0x17);
-{ uint32_t x84 = ((uint32_t)x82 & 0x7fffff);
-{ uint64_t x85 = (x83 + x33);
-{ uint64_t x86 = (x85 >> 0x17);
-{ uint32_t x87 = ((uint32_t)x85 & 0x7fffff);
-{ uint64_t x88 = (x86 + x32);
-{ uint64_t x89 = (x88 >> 0x17);
-{ uint32_t x90 = ((uint32_t)x88 & 0x7fffff);
-{ uint64_t x91 = (x89 + x31);
-{ uint32_t x92 = (uint32_t) (x91 >> 0x17);
-{ uint32_t x93 = ((uint32_t)x91 & 0x7fffff);
-{ uint64_t x94 = (x48 + ((uint64_t)0x19 * x92));
-{ uint32_t x95 = (uint32_t) (x94 >> 0x18);
-{ uint32_t x96 = ((uint32_t)x94 & 0xffffff);
-{ uint32_t x97 = (x95 + x51);
-{ uint32_t x98 = (x97 >> 0x17);
-{ uint32_t x99 = (x97 & 0x7fffff);
-out[0] = x93;
-out[1] = x90;
-out[2] = x87;
-out[3] = x84;
-out[4] = x81;
-out[5] = x78;
-out[6] = x75;
-out[7] = x72;
-out[8] = x69;
-out[9] = x66;
-out[10] = x63;
-out[11] = x60;
-out[12] = x57;
-out[13] = x98 + x54;
-out[14] = x99;
-out[15] = x96;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[16];
+static void fesquare(uint32_t out[16], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x31 = (((uint64_t)x2 * x29) + ((0x2 * ((uint64_t)x4 * x30)) + ((0x2 * ((uint64_t)x6 * x28)) + ((0x2 * ((uint64_t)x8 * x26)) + ((0x2 * ((uint64_t)x10 * x24)) + ((0x2 * ((uint64_t)x12 * x22)) + ((0x2 * ((uint64_t)x14 * x20)) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + ((0x2 * ((uint64_t)x20 * x14)) + ((0x2 * ((uint64_t)x22 * x12)) + ((0x2 * ((uint64_t)x24 * x10)) + ((0x2 * ((uint64_t)x26 * x8)) + ((0x2 * ((uint64_t)x28 * x6)) + ((0x2 * ((uint64_t)x30 * x4)) + ((uint64_t)x29 * x2))))))))))))))));
+ { uint64_t x32 = ((((uint64_t)x2 * x30) + ((0x2 * ((uint64_t)x4 * x28)) + ((0x2 * ((uint64_t)x6 * x26)) + ((0x2 * ((uint64_t)x8 * x24)) + ((0x2 * ((uint64_t)x10 * x22)) + ((0x2 * ((uint64_t)x12 * x20)) + ((0x2 * ((uint64_t)x14 * x18)) + ((0x2 * ((uint64_t)x16 * x16)) + ((0x2 * ((uint64_t)x18 * x14)) + ((0x2 * ((uint64_t)x20 * x12)) + ((0x2 * ((uint64_t)x22 * x10)) + ((0x2 * ((uint64_t)x24 * x8)) + ((0x2 * ((uint64_t)x26 * x6)) + ((0x2 * ((uint64_t)x28 * x4)) + ((uint64_t)x30 * x2))))))))))))))) + (0x19 * ((uint64_t)x29 * x29)));
+ { uint64_t x33 = ((((uint64_t)x2 * x28) + ((0x2 * ((uint64_t)x4 * x26)) + ((0x2 * ((uint64_t)x6 * x24)) + ((0x2 * ((uint64_t)x8 * x22)) + ((0x2 * ((uint64_t)x10 * x20)) + ((0x2 * ((uint64_t)x12 * x18)) + ((0x2 * ((uint64_t)x14 * x16)) + ((0x2 * ((uint64_t)x16 * x14)) + ((0x2 * ((uint64_t)x18 * x12)) + ((0x2 * ((uint64_t)x20 * x10)) + ((0x2 * ((uint64_t)x22 * x8)) + ((0x2 * ((uint64_t)x24 * x6)) + ((0x2 * ((uint64_t)x26 * x4)) + ((uint64_t)x28 * x2)))))))))))))) + (0x19 * (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))));
+ { uint64_t x34 = ((((uint64_t)x2 * x26) + ((0x2 * ((uint64_t)x4 * x24)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + ((0x2 * ((uint64_t)x14 * x14)) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + ((0x2 * ((uint64_t)x24 * x4)) + ((uint64_t)x26 * x2))))))))))))) + (0x19 * (((uint64_t)x28 * x29) + (((uint64_t)x30 * x30) + ((uint64_t)x29 * x28)))));
+ { uint64_t x35 = ((((uint64_t)x2 * x24) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + ((0x2 * ((uint64_t)x8 * x18)) + ((0x2 * ((uint64_t)x10 * x16)) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + ((0x2 * ((uint64_t)x16 * x10)) + ((0x2 * ((uint64_t)x18 * x8)) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x24 * x2)))))))))))) + (0x19 * (((uint64_t)x26 * x29) + (((uint64_t)x28 * x30) + (((uint64_t)x30 * x28) + ((uint64_t)x29 * x26))))));
+ { uint64_t x36 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x19 * (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + (((uint64_t)x28 * x28) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))));
+ { uint64_t x37 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x19 * (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))));
+ { uint64_t x38 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x19 * (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))));
+ { uint64_t x39 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x19 * (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18))))))))));
+ { uint64_t x40 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x19 * (((uint64_t)x16 * x29) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + (((uint64_t)x30 * x18) + ((uint64_t)x29 * x16)))))))))));
+ { uint64_t x41 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x19 * (((uint64_t)x14 * x29) + (((uint64_t)x16 * x30) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + (((uint64_t)x28 * x18) + (((uint64_t)x30 * x16) + ((uint64_t)x29 * x14))))))))))));
+ { uint64_t x42 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x19 * (((uint64_t)x12 * x29) + (((uint64_t)x14 * x30) + (((uint64_t)x16 * x28) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + (((uint64_t)x26 * x18) + (((uint64_t)x28 * x16) + (((uint64_t)x30 * x14) + ((uint64_t)x29 * x12)))))))))))));
+ { uint64_t x43 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x19 * (((uint64_t)x10 * x29) + (((uint64_t)x12 * x30) + (((uint64_t)x14 * x28) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + (((uint64_t)x26 * x16) + (((uint64_t)x28 * x14) + (((uint64_t)x30 * x12) + ((uint64_t)x29 * x10))))))))))))));
+ { uint64_t x44 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x19 * (((uint64_t)x8 * x29) + (((uint64_t)x10 * x30) + (((uint64_t)x12 * x28) + (((uint64_t)x14 * x26) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + (((uint64_t)x26 * x14) + (((uint64_t)x28 * x12) + (((uint64_t)x30 * x10) + ((uint64_t)x29 * x8)))))))))))))));
+ { uint64_t x45 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x19 * (((uint64_t)x6 * x29) + (((uint64_t)x8 * x30) + (((uint64_t)x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + (((uint64_t)x28 * x10) + (((uint64_t)x30 * x8) + ((uint64_t)x29 * x6))))))))))))))));
+ { uint64_t x46 = (((uint64_t)x2 * x2) + (0x19 * ((0x2 * ((uint64_t)x4 * x29)) + ((0x2 * ((uint64_t)x6 * x30)) + ((0x2 * ((uint64_t)x8 * x28)) + ((0x2 * ((uint64_t)x10 * x26)) + ((0x2 * ((uint64_t)x12 * x24)) + ((0x2 * ((uint64_t)x14 * x22)) + ((0x2 * ((uint64_t)x16 * x20)) + ((0x2 * ((uint64_t)x18 * x18)) + ((0x2 * ((uint64_t)x20 * x16)) + ((0x2 * ((uint64_t)x22 * x14)) + ((0x2 * ((uint64_t)x24 * x12)) + ((0x2 * ((uint64_t)x26 * x10)) + ((0x2 * ((uint64_t)x28 * x8)) + ((0x2 * ((uint64_t)x30 * x6)) + (0x2 * ((uint64_t)x29 * x4))))))))))))))))));
+ { uint64_t x47 = (x46 >> 0x18);
+ { uint32_t x48 = ((uint32_t)x46 & 0xffffff);
+ { uint64_t x49 = (x47 + x45);
+ { uint64_t x50 = (x49 >> 0x17);
+ { uint32_t x51 = ((uint32_t)x49 & 0x7fffff);
+ { uint64_t x52 = (x50 + x44);
+ { uint64_t x53 = (x52 >> 0x17);
+ { uint32_t x54 = ((uint32_t)x52 & 0x7fffff);
+ { uint64_t x55 = (x53 + x43);
+ { uint64_t x56 = (x55 >> 0x17);
+ { uint32_t x57 = ((uint32_t)x55 & 0x7fffff);
+ { uint64_t x58 = (x56 + x42);
+ { uint64_t x59 = (x58 >> 0x17);
+ { uint32_t x60 = ((uint32_t)x58 & 0x7fffff);
+ { uint64_t x61 = (x59 + x41);
+ { uint64_t x62 = (x61 >> 0x17);
+ { uint32_t x63 = ((uint32_t)x61 & 0x7fffff);
+ { uint64_t x64 = (x62 + x40);
+ { uint64_t x65 = (x64 >> 0x17);
+ { uint32_t x66 = ((uint32_t)x64 & 0x7fffff);
+ { uint64_t x67 = (x65 + x39);
+ { uint64_t x68 = (x67 >> 0x17);
+ { uint32_t x69 = ((uint32_t)x67 & 0x7fffff);
+ { uint64_t x70 = (x68 + x38);
+ { uint64_t x71 = (x70 >> 0x17);
+ { uint32_t x72 = ((uint32_t)x70 & 0x7fffff);
+ { uint64_t x73 = (x71 + x37);
+ { uint64_t x74 = (x73 >> 0x17);
+ { uint32_t x75 = ((uint32_t)x73 & 0x7fffff);
+ { uint64_t x76 = (x74 + x36);
+ { uint64_t x77 = (x76 >> 0x17);
+ { uint32_t x78 = ((uint32_t)x76 & 0x7fffff);
+ { uint64_t x79 = (x77 + x35);
+ { uint64_t x80 = (x79 >> 0x17);
+ { uint32_t x81 = ((uint32_t)x79 & 0x7fffff);
+ { uint64_t x82 = (x80 + x34);
+ { uint64_t x83 = (x82 >> 0x17);
+ { uint32_t x84 = ((uint32_t)x82 & 0x7fffff);
+ { uint64_t x85 = (x83 + x33);
+ { uint64_t x86 = (x85 >> 0x17);
+ { uint32_t x87 = ((uint32_t)x85 & 0x7fffff);
+ { uint64_t x88 = (x86 + x32);
+ { uint64_t x89 = (x88 >> 0x17);
+ { uint32_t x90 = ((uint32_t)x88 & 0x7fffff);
+ { uint64_t x91 = (x89 + x31);
+ { uint32_t x92 = (uint32_t) (x91 >> 0x17);
+ { uint32_t x93 = ((uint32_t)x91 & 0x7fffff);
+ { uint64_t x94 = (x48 + ((uint64_t)0x19 * x92));
+ { uint32_t x95 = (uint32_t) (x94 >> 0x18);
+ { uint32_t x96 = ((uint32_t)x94 & 0xffffff);
+ { uint32_t x97 = (x95 + x51);
+ { uint32_t x98 = (x97 >> 0x17);
+ { uint32_t x99 = (x97 & 0x7fffff);
+ out[0] = x96;
+ out[1] = x99;
+ out[2] = (x98 + x54);
+ out[3] = x57;
+ out[4] = x60;
+ out[5] = x63;
+ out[6] = x66;
+ out[7] = x69;
+ out[8] = x72;
+ out[9] = x75;
+ out[10] = x78;
+ out[11] = x81;
+ out[12] = x84;
+ out[13] = x87;
+ out[14] = x90;
+ out[15] = x93;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e369m25/freeze.c b/src/Specific/solinas32_2e369m25/freeze.c
index 581df79d9..15926bf76 100644
--- a/src/Specific/solinas32_2e369m25/freeze.c
+++ b/src/Specific/solinas32_2e369m25/freeze.c
@@ -1,25 +1,84 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x32;
-out[1] = uint8_t x33 = Op Syntax.SubWithGetBorrow 24 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffe7;;
+static void freeze(uint32_t out[16], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0xffffe7);
+ { uint32_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x4, 0x7fffff);
+ { uint32_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x36, Return x6, 0x7fffff);
+ { uint32_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x39, Return x8, 0x7fffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x10, 0x7fffff);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x12, 0x7fffff);
+ { uint32_t x50, uint8_t x51 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x48, Return x14, 0x7fffff);
+ { uint32_t x53, uint8_t x54 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x51, Return x16, 0x7fffff);
+ { uint32_t x56, uint8_t x57 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x54, Return x18, 0x7fffff);
+ { uint32_t x59, uint8_t x60 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x20, 0x7fffff);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x22, 0x7fffff);
+ { uint32_t x65, uint8_t x66 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x24, 0x7fffff);
+ { uint32_t x68, uint8_t x69 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x66, Return x26, 0x7fffff);
+ { uint32_t x71, uint8_t x72 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x69, Return x28, 0x7fffff);
+ { uint32_t x74, uint8_t x75 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x30, 0x7fffff);
+ { uint32_t x77, uint8_t x78 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x29, 0x7fffff);
+ { uint32_t x79 = (uint32_t)cmovznz(x78, 0x0, 0xffffffff);
+ { uint32_t x80 = (x79 & 0xffffe7);
+ { uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x32, Return x80);
+ { uint32_t x84 = (x79 & 0x7fffff);
+ { uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x35, Return x84);
+ { uint32_t x88 = (x79 & 0x7fffff);
+ { uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x38, Return x88);
+ { uint32_t x92 = (x79 & 0x7fffff);
+ { uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x41, Return x92);
+ { uint32_t x96 = (x79 & 0x7fffff);
+ { uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x44, Return x96);
+ { uint32_t x100 = (x79 & 0x7fffff);
+ { uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x47, Return x100);
+ { uint32_t x104 = (x79 & 0x7fffff);
+ { uint32_t x106, uint8_t x107 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x50, Return x104);
+ { uint32_t x108 = (x79 & 0x7fffff);
+ { uint32_t x110, uint8_t x111 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x107, Return x53, Return x108);
+ { uint32_t x112 = (x79 & 0x7fffff);
+ { uint32_t x114, uint8_t x115 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x111, Return x56, Return x112);
+ { uint32_t x116 = (x79 & 0x7fffff);
+ { uint32_t x118, uint8_t x119 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x115, Return x59, Return x116);
+ { uint32_t x120 = (x79 & 0x7fffff);
+ { uint32_t x122, uint8_t x123 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x119, Return x62, Return x120);
+ { uint32_t x124 = (x79 & 0x7fffff);
+ { uint32_t x126, uint8_t x127 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x123, Return x65, Return x124);
+ { uint32_t x128 = (x79 & 0x7fffff);
+ { uint32_t x130, uint8_t x131 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x127, Return x68, Return x128);
+ { uint32_t x132 = (x79 & 0x7fffff);
+ { uint32_t x134, uint8_t x135 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x131, Return x71, Return x132);
+ { uint32_t x136 = (x79 & 0x7fffff);
+ { uint32_t x138, uint8_t x139 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x135, Return x74, Return x136);
+ { uint32_t x140 = (x79 & 0x7fffff);
+ { uint32_t x142, uint8_t _ = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x139, Return x77, Return x140);
+ out[0] = x82;
+ out[1] = x86;
+ out[2] = x90;
+ out[3] = x94;
+ out[4] = x98;
+ out[5] = x102;
+ out[6] = x106;
+ out[7] = x110;
+ out[8] = x114;
+ out[9] = x118;
+ out[10] = x122;
+ out[11] = x126;
+ out[12] = x130;
+ out[13] = x134;
+ out[14] = x138;
+ out[15] = x142;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e384m2e128m2e96p2e32m1/freeze.c b/src/Specific/solinas32_2e384m2e128m2e96p2e32m1/freeze.c
index 200b9760d..b11fdca42 100644
--- a/src/Specific/solinas32_2e384m2e128m2e96p2e32m1/freeze.c
+++ b/src/Specific/solinas32_2e384m2e128m2e96p2e32m1/freeze.c
@@ -1,25 +1,82 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x32;
-out[1] = uint8_t x33 = Op Syntax.SubWithGetBorrow 24 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffff;;
+static void freeze(uint32_t out[16], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0xffffff);
+ { uint32_t x35, ℤ x36 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) Syntax.TZ) (Return x33, Return x4, 0xff);
+ { uint32_t x38, ℤ x39 = Op (Syntax.SubWithGetBorrow 24 Syntax.TZ (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) Syntax.TZ) (Return x36, Return x6, 0x0);
+ { uint32_t x41, ℤ x42 = Op (Syntax.SubWithGetBorrow 24 Syntax.TZ (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) Syntax.TZ) (Return x39, Return x8, 0x0);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 24 Syntax.TZ (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x10, 0xffffff);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x12, 0xfffeff);
+ { uint32_t x50, uint8_t x51 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x48, Return x14, 0xffffff);
+ { uint32_t x53, uint8_t x54 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x51, Return x16, 0xffffff);
+ { uint32_t x56, uint8_t x57 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x54, Return x18, 0xffffff);
+ { uint32_t x59, uint8_t x60 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x20, 0xffffff);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x22, 0xffffff);
+ { uint32_t x65, uint8_t x66 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x24, 0xffffff);
+ { uint32_t x68, uint8_t x69 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x66, Return x26, 0xffffff);
+ { uint32_t x71, uint8_t x72 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x69, Return x28, 0xffffff);
+ { uint32_t x74, uint8_t x75 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x30, 0xffffff);
+ { uint32_t x77, uint8_t x78 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x29, 0xffffff);
+ { uint32_t x79 = (uint32_t)cmovznz(x78, 0x0, 0xffffffff);
+ { uint32_t x80 = (x79 & 0xffffff);
+ { uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x32, Return x80);
+ { uint8_t x84 = ((uint8_t)x79 & 0xff);
+ { uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x35, Return x84);
+ { uint32_t x89, uint8_t x90 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x38, 0x0);
+ { uint32_t x92, uint8_t x93 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x90, Return x41, 0x0);
+ { uint32_t x94 = (x79 & 0xffffff);
+ { uint32_t x96, uint8_t x97 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x93, Return x44, Return x94);
+ { uint32_t x98 = (x79 & 0xfffeff);
+ { uint32_t x100, uint8_t x101 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x97, Return x47, Return x98);
+ { uint32_t x102 = (x79 & 0xffffff);
+ { uint32_t x104, uint8_t x105 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x101, Return x50, Return x102);
+ { uint32_t x106 = (x79 & 0xffffff);
+ { uint32_t x108, uint8_t x109 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x105, Return x53, Return x106);
+ { uint32_t x110 = (x79 & 0xffffff);
+ { uint32_t x112, uint8_t x113 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x109, Return x56, Return x110);
+ { uint32_t x114 = (x79 & 0xffffff);
+ { uint32_t x116, uint8_t x117 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x113, Return x59, Return x114);
+ { uint32_t x118 = (x79 & 0xffffff);
+ { uint32_t x120, uint8_t x121 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x117, Return x62, Return x118);
+ { uint32_t x122 = (x79 & 0xffffff);
+ { uint32_t x124, uint8_t x125 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x121, Return x65, Return x122);
+ { uint32_t x126 = (x79 & 0xffffff);
+ { uint32_t x128, uint8_t x129 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x125, Return x68, Return x126);
+ { uint32_t x130 = (x79 & 0xffffff);
+ { uint32_t x132, uint8_t x133 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x129, Return x71, Return x130);
+ { uint32_t x134 = (x79 & 0xffffff);
+ { uint32_t x136, uint8_t x137 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x133, Return x74, Return x134);
+ { uint32_t x138 = (x79 & 0xffffff);
+ { uint32_t x140, uint8_t _ = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x137, Return x77, Return x138);
+ out[0] = x82;
+ out[1] = x86;
+ out[2] = x89;
+ out[3] = x92;
+ out[4] = x96;
+ out[5] = x100;
+ out[6] = x104;
+ out[7] = x108;
+ out[8] = x112;
+ out[9] = x116;
+ out[10] = x120;
+ out[11] = x124;
+ out[12] = x128;
+ out[13] = x132;
+ out[14] = x136;
+ out[15] = x140;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e384m5x2e368m1/freeze.c b/src/Specific/solinas32_2e384m5x2e368m1/freeze.c
index 200b9760d..6e58bd525 100644
--- a/src/Specific/solinas32_2e384m5x2e368m1/freeze.c
+++ b/src/Specific/solinas32_2e384m5x2e368m1/freeze.c
@@ -1,25 +1,84 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x32;
-out[1] = uint8_t x33 = Op Syntax.SubWithGetBorrow 24 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffff;;
+static void freeze(uint32_t out[16], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0xffffff);
+ { uint32_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x4, 0xffffff);
+ { uint32_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x36, Return x6, 0xffffff);
+ { uint32_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x39, Return x8, 0xffffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x10, 0xffffff);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x12, 0xffffff);
+ { uint32_t x50, uint8_t x51 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x48, Return x14, 0xffffff);
+ { uint32_t x53, uint8_t x54 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x51, Return x16, 0xffffff);
+ { uint32_t x56, uint8_t x57 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x54, Return x18, 0xffffff);
+ { uint32_t x59, uint8_t x60 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x20, 0xffffff);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x22, 0xffffff);
+ { uint32_t x65, uint8_t x66 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x24, 0xffffff);
+ { uint32_t x68, uint8_t x69 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x66, Return x26, 0xffffff);
+ { uint32_t x71, uint8_t x72 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x69, Return x28, 0xffffff);
+ { uint32_t x74, uint8_t x75 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x30, 0xffffff);
+ { uint32_t x77, uint8_t x78 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x29, 0xfffaff);
+ { uint32_t x79 = (uint32_t)cmovznz(x78, 0x0, 0xffffffff);
+ { uint32_t x80 = (x79 & 0xffffff);
+ { uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x32, Return x80);
+ { uint32_t x84 = (x79 & 0xffffff);
+ { uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x35, Return x84);
+ { uint32_t x88 = (x79 & 0xffffff);
+ { uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x38, Return x88);
+ { uint32_t x92 = (x79 & 0xffffff);
+ { uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x41, Return x92);
+ { uint32_t x96 = (x79 & 0xffffff);
+ { uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x44, Return x96);
+ { uint32_t x100 = (x79 & 0xffffff);
+ { uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x47, Return x100);
+ { uint32_t x104 = (x79 & 0xffffff);
+ { uint32_t x106, uint8_t x107 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x50, Return x104);
+ { uint32_t x108 = (x79 & 0xffffff);
+ { uint32_t x110, uint8_t x111 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x107, Return x53, Return x108);
+ { uint32_t x112 = (x79 & 0xffffff);
+ { uint32_t x114, uint8_t x115 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x111, Return x56, Return x112);
+ { uint32_t x116 = (x79 & 0xffffff);
+ { uint32_t x118, uint8_t x119 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x115, Return x59, Return x116);
+ { uint32_t x120 = (x79 & 0xffffff);
+ { uint32_t x122, uint8_t x123 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x119, Return x62, Return x120);
+ { uint32_t x124 = (x79 & 0xffffff);
+ { uint32_t x126, uint8_t x127 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x123, Return x65, Return x124);
+ { uint32_t x128 = (x79 & 0xffffff);
+ { uint32_t x130, uint8_t x131 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x127, Return x68, Return x128);
+ { uint32_t x132 = (x79 & 0xffffff);
+ { uint32_t x134, uint8_t x135 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x131, Return x71, Return x132);
+ { uint32_t x136 = (x79 & 0xffffff);
+ { uint32_t x138, uint8_t x139 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x135, Return x74, Return x136);
+ { uint32_t x140 = (x79 & 0xfffaff);
+ { uint32_t x142, uint8_t _ = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x139, Return x77, Return x140);
+ out[0] = x82;
+ out[1] = x86;
+ out[2] = x90;
+ out[3] = x94;
+ out[4] = x98;
+ out[5] = x102;
+ out[6] = x106;
+ out[7] = x110;
+ out[8] = x114;
+ out[9] = x118;
+ out[10] = x122;
+ out[11] = x126;
+ out[12] = x130;
+ out[13] = x134;
+ out[14] = x138;
+ out[15] = x142;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e384m79x2e376m1/freeze.c b/src/Specific/solinas32_2e384m79x2e376m1/freeze.c
index 200b9760d..3f29d8d5c 100644
--- a/src/Specific/solinas32_2e384m79x2e376m1/freeze.c
+++ b/src/Specific/solinas32_2e384m79x2e376m1/freeze.c
@@ -1,25 +1,84 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x32;
-out[1] = uint8_t x33 = Op Syntax.SubWithGetBorrow 24 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffff;;
+static void freeze(uint32_t out[16], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0xffffff);
+ { uint32_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x4, 0xffffff);
+ { uint32_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x36, Return x6, 0xffffff);
+ { uint32_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x39, Return x8, 0xffffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x10, 0xffffff);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x12, 0xffffff);
+ { uint32_t x50, uint8_t x51 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x48, Return x14, 0xffffff);
+ { uint32_t x53, uint8_t x54 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x51, Return x16, 0xffffff);
+ { uint32_t x56, uint8_t x57 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x54, Return x18, 0xffffff);
+ { uint32_t x59, uint8_t x60 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x20, 0xffffff);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x22, 0xffffff);
+ { uint32_t x65, uint8_t x66 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x24, 0xffffff);
+ { uint32_t x68, uint8_t x69 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x66, Return x26, 0xffffff);
+ { uint32_t x71, uint8_t x72 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x69, Return x28, 0xffffff);
+ { uint32_t x74, uint8_t x75 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x30, 0xffffff);
+ { uint32_t x77, uint8_t x78 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x29, 0xb0ffff);
+ { uint32_t x79 = (uint32_t)cmovznz(x78, 0x0, 0xffffffff);
+ { uint32_t x80 = (x79 & 0xffffff);
+ { uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x32, Return x80);
+ { uint32_t x84 = (x79 & 0xffffff);
+ { uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x35, Return x84);
+ { uint32_t x88 = (x79 & 0xffffff);
+ { uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x38, Return x88);
+ { uint32_t x92 = (x79 & 0xffffff);
+ { uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x41, Return x92);
+ { uint32_t x96 = (x79 & 0xffffff);
+ { uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x44, Return x96);
+ { uint32_t x100 = (x79 & 0xffffff);
+ { uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x47, Return x100);
+ { uint32_t x104 = (x79 & 0xffffff);
+ { uint32_t x106, uint8_t x107 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x50, Return x104);
+ { uint32_t x108 = (x79 & 0xffffff);
+ { uint32_t x110, uint8_t x111 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x107, Return x53, Return x108);
+ { uint32_t x112 = (x79 & 0xffffff);
+ { uint32_t x114, uint8_t x115 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x111, Return x56, Return x112);
+ { uint32_t x116 = (x79 & 0xffffff);
+ { uint32_t x118, uint8_t x119 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x115, Return x59, Return x116);
+ { uint32_t x120 = (x79 & 0xffffff);
+ { uint32_t x122, uint8_t x123 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x119, Return x62, Return x120);
+ { uint32_t x124 = (x79 & 0xffffff);
+ { uint32_t x126, uint8_t x127 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x123, Return x65, Return x124);
+ { uint32_t x128 = (x79 & 0xffffff);
+ { uint32_t x130, uint8_t x131 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x127, Return x68, Return x128);
+ { uint32_t x132 = (x79 & 0xffffff);
+ { uint32_t x134, uint8_t x135 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x131, Return x71, Return x132);
+ { uint32_t x136 = (x79 & 0xffffff);
+ { uint32_t x138, uint8_t x139 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x135, Return x74, Return x136);
+ { uint32_t x140 = (x79 & 0xb0ffff);
+ { uint32_t x142, uint8_t _ = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x139, Return x77, Return x140);
+ out[0] = x82;
+ out[1] = x86;
+ out[2] = x90;
+ out[3] = x94;
+ out[4] = x98;
+ out[5] = x102;
+ out[6] = x106;
+ out[7] = x110;
+ out[8] = x114;
+ out[9] = x118;
+ out[10] = x122;
+ out[11] = x126;
+ out[12] = x130;
+ out[13] = x134;
+ out[14] = x138;
+ out[15] = x142;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e401m31/femul.c b/src/Specific/solinas32_2e401m31/femul.c
index df691305f..39cd5de4e 100644
--- a/src/Specific/solinas32_2e401m31/femul.c
+++ b/src/Specific/solinas32_2e401m31/femul.c
@@ -1,106 +1,120 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
-{ uint64_t x64 = (((uint64_t)x5 * x62) + ((0x2 * ((uint64_t)x7 * x63)) + ((0x2 * ((uint64_t)x9 * x61)) + ((0x2 * ((uint64_t)x11 * x59)) + ((0x2 * ((uint64_t)x13 * x57)) + ((0x2 * ((uint64_t)x15 * x55)) + ((0x2 * ((uint64_t)x17 * x53)) + ((0x2 * ((uint64_t)x19 * x51)) + ((0x2 * ((uint64_t)x21 * x49)) + ((0x2 * ((uint64_t)x23 * x47)) + ((0x2 * ((uint64_t)x25 * x45)) + ((0x2 * ((uint64_t)x27 * x43)) + ((0x2 * ((uint64_t)x29 * x41)) + ((0x2 * ((uint64_t)x31 * x39)) + ((0x2 * ((uint64_t)x33 * x37)) + ((uint64_t)x32 * x35))))))))))))))));
-{ uint64_t x65 = ((((uint64_t)x5 * x63) + ((0x2 * ((uint64_t)x7 * x61)) + ((0x2 * ((uint64_t)x9 * x59)) + ((0x2 * ((uint64_t)x11 * x57)) + ((0x2 * ((uint64_t)x13 * x55)) + ((0x2 * ((uint64_t)x15 * x53)) + ((0x2 * ((uint64_t)x17 * x51)) + ((0x2 * ((uint64_t)x19 * x49)) + ((0x2 * ((uint64_t)x21 * x47)) + ((0x2 * ((uint64_t)x23 * x45)) + ((0x2 * ((uint64_t)x25 * x43)) + ((0x2 * ((uint64_t)x27 * x41)) + ((0x2 * ((uint64_t)x29 * x39)) + ((0x2 * ((uint64_t)x31 * x37)) + ((uint64_t)x33 * x35))))))))))))))) + (0x1f * ((uint64_t)x32 * x62)));
-{ uint64_t x66 = ((((uint64_t)x5 * x61) + ((0x2 * ((uint64_t)x7 * x59)) + ((0x2 * ((uint64_t)x9 * x57)) + ((0x2 * ((uint64_t)x11 * x55)) + ((0x2 * ((uint64_t)x13 * x53)) + ((0x2 * ((uint64_t)x15 * x51)) + ((0x2 * ((uint64_t)x17 * x49)) + ((0x2 * ((uint64_t)x19 * x47)) + ((0x2 * ((uint64_t)x21 * x45)) + ((0x2 * ((uint64_t)x23 * x43)) + ((0x2 * ((uint64_t)x25 * x41)) + ((0x2 * ((uint64_t)x27 * x39)) + ((0x2 * ((uint64_t)x29 * x37)) + ((uint64_t)x31 * x35)))))))))))))) + (0x1f * (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))));
-{ uint64_t x67 = ((((uint64_t)x5 * x59) + ((0x2 * ((uint64_t)x7 * x57)) + ((0x2 * ((uint64_t)x9 * x55)) + ((0x2 * ((uint64_t)x11 * x53)) + ((0x2 * ((uint64_t)x13 * x51)) + ((0x2 * ((uint64_t)x15 * x49)) + ((0x2 * ((uint64_t)x17 * x47)) + ((0x2 * ((uint64_t)x19 * x45)) + ((0x2 * ((uint64_t)x21 * x43)) + ((0x2 * ((uint64_t)x23 * x41)) + ((0x2 * ((uint64_t)x25 * x39)) + ((0x2 * ((uint64_t)x27 * x37)) + ((uint64_t)x29 * x35))))))))))))) + (0x1f * (((uint64_t)x31 * x62) + (((uint64_t)x33 * x63) + ((uint64_t)x32 * x61)))));
-{ uint64_t x68 = ((((uint64_t)x5 * x57) + ((0x2 * ((uint64_t)x7 * x55)) + ((0x2 * ((uint64_t)x9 * x53)) + ((0x2 * ((uint64_t)x11 * x51)) + ((0x2 * ((uint64_t)x13 * x49)) + ((0x2 * ((uint64_t)x15 * x47)) + ((0x2 * ((uint64_t)x17 * x45)) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + ((0x2 * ((uint64_t)x23 * x39)) + ((0x2 * ((uint64_t)x25 * x37)) + ((uint64_t)x27 * x35)))))))))))) + (0x1f * (((uint64_t)x29 * x62) + (((uint64_t)x31 * x63) + (((uint64_t)x33 * x61) + ((uint64_t)x32 * x59))))));
-{ uint64_t x69 = ((((uint64_t)x5 * x55) + ((0x2 * ((uint64_t)x7 * x53)) + ((0x2 * ((uint64_t)x9 * x51)) + ((0x2 * ((uint64_t)x11 * x49)) + ((0x2 * ((uint64_t)x13 * x47)) + ((0x2 * ((uint64_t)x15 * x45)) + ((0x2 * ((uint64_t)x17 * x43)) + ((0x2 * ((uint64_t)x19 * x41)) + ((0x2 * ((uint64_t)x21 * x39)) + ((0x2 * ((uint64_t)x23 * x37)) + ((uint64_t)x25 * x35))))))))))) + (0x1f * (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + (((uint64_t)x31 * x61) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))));
-{ uint64_t x70 = ((((uint64_t)x5 * x53) + ((0x2 * ((uint64_t)x7 * x51)) + ((0x2 * ((uint64_t)x9 * x49)) + ((0x2 * ((uint64_t)x11 * x47)) + ((0x2 * ((uint64_t)x13 * x45)) + ((0x2 * ((uint64_t)x15 * x43)) + ((0x2 * ((uint64_t)x17 * x41)) + ((0x2 * ((uint64_t)x19 * x39)) + ((0x2 * ((uint64_t)x21 * x37)) + ((uint64_t)x23 * x35)))))))))) + (0x1f * (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))));
-{ uint64_t x71 = ((((uint64_t)x5 * x51) + ((0x2 * ((uint64_t)x7 * x49)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + ((uint64_t)x21 * x35))))))))) + (0x1f * (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))));
-{ uint64_t x72 = ((((uint64_t)x5 * x49) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + ((0x2 * ((uint64_t)x11 * x43)) + ((0x2 * ((uint64_t)x13 * x41)) + ((0x2 * ((uint64_t)x15 * x39)) + ((0x2 * ((uint64_t)x17 * x37)) + ((uint64_t)x19 * x35)))))))) + (0x1f * (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51))))))))));
-{ uint64_t x73 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + ((0x2 * ((uint64_t)x9 * x43)) + ((0x2 * ((uint64_t)x11 * x41)) + ((0x2 * ((uint64_t)x13 * x39)) + ((0x2 * ((uint64_t)x15 * x37)) + ((uint64_t)x17 * x35))))))) + (0x1f * (((uint64_t)x19 * x62) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + (((uint64_t)x33 * x51) + ((uint64_t)x32 * x49)))))))))));
-{ uint64_t x74 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + ((0x2 * ((uint64_t)x11 * x39)) + ((0x2 * ((uint64_t)x13 * x37)) + ((uint64_t)x15 * x35)))))) + (0x1f * (((uint64_t)x17 * x62) + (((uint64_t)x19 * x63) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + (((uint64_t)x31 * x51) + (((uint64_t)x33 * x49) + ((uint64_t)x32 * x47))))))))))));
-{ uint64_t x75 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((uint64_t)x13 * x35))))) + (0x1f * (((uint64_t)x15 * x62) + (((uint64_t)x17 * x63) + (((uint64_t)x19 * x61) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + (((uint64_t)x29 * x51) + (((uint64_t)x31 * x49) + (((uint64_t)x33 * x47) + ((uint64_t)x32 * x45)))))))))))));
-{ uint64_t x76 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((uint64_t)x11 * x35)))) + (0x1f * (((uint64_t)x13 * x62) + (((uint64_t)x15 * x63) + (((uint64_t)x17 * x61) + (((uint64_t)x19 * x59) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + (((uint64_t)x31 * x47) + (((uint64_t)x33 * x45) + ((uint64_t)x32 * x43))))))))))))));
-{ uint64_t x77 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((uint64_t)x9 * x35))) + (0x1f * (((uint64_t)x11 * x62) + (((uint64_t)x13 * x63) + (((uint64_t)x15 * x61) + (((uint64_t)x17 * x59) + (((uint64_t)x19 * x57) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + (((uint64_t)x31 * x45) + (((uint64_t)x33 * x43) + ((uint64_t)x32 * x41)))))))))))))));
-{ uint64_t x78 = ((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) + (0x1f * (((uint64_t)x9 * x62) + (((uint64_t)x11 * x63) + (((uint64_t)x13 * x61) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + (((uint64_t)x33 * x41) + ((uint64_t)x32 * x39))))))))))))))));
-{ uint64_t x79 = (((uint64_t)x5 * x35) + (0x1f * ((0x2 * ((uint64_t)x7 * x62)) + ((0x2 * ((uint64_t)x9 * x63)) + ((0x2 * ((uint64_t)x11 * x61)) + ((0x2 * ((uint64_t)x13 * x59)) + ((0x2 * ((uint64_t)x15 * x57)) + ((0x2 * ((uint64_t)x17 * x55)) + ((0x2 * ((uint64_t)x19 * x53)) + ((0x2 * ((uint64_t)x21 * x51)) + ((0x2 * ((uint64_t)x23 * x49)) + ((0x2 * ((uint64_t)x25 * x47)) + ((0x2 * ((uint64_t)x27 * x45)) + ((0x2 * ((uint64_t)x29 * x43)) + ((0x2 * ((uint64_t)x31 * x41)) + ((0x2 * ((uint64_t)x33 * x39)) + (0x2 * ((uint64_t)x32 * x37))))))))))))))))));
-{ uint64_t x80 = (x79 >> 0x1a);
-{ uint32_t x81 = ((uint32_t)x79 & 0x3ffffff);
-{ uint64_t x82 = (x80 + x78);
-{ uint64_t x83 = (x82 >> 0x19);
-{ uint32_t x84 = ((uint32_t)x82 & 0x1ffffff);
-{ uint64_t x85 = (x83 + x77);
-{ uint64_t x86 = (x85 >> 0x19);
-{ uint32_t x87 = ((uint32_t)x85 & 0x1ffffff);
-{ uint64_t x88 = (x86 + x76);
-{ uint64_t x89 = (x88 >> 0x19);
-{ uint32_t x90 = ((uint32_t)x88 & 0x1ffffff);
-{ uint64_t x91 = (x89 + x75);
-{ uint64_t x92 = (x91 >> 0x19);
-{ uint32_t x93 = ((uint32_t)x91 & 0x1ffffff);
-{ uint64_t x94 = (x92 + x74);
-{ uint64_t x95 = (x94 >> 0x19);
-{ uint32_t x96 = ((uint32_t)x94 & 0x1ffffff);
-{ uint64_t x97 = (x95 + x73);
-{ uint64_t x98 = (x97 >> 0x19);
-{ uint32_t x99 = ((uint32_t)x97 & 0x1ffffff);
-{ uint64_t x100 = (x98 + x72);
-{ uint64_t x101 = (x100 >> 0x19);
-{ uint32_t x102 = ((uint32_t)x100 & 0x1ffffff);
-{ uint64_t x103 = (x101 + x71);
-{ uint64_t x104 = (x103 >> 0x19);
-{ uint32_t x105 = ((uint32_t)x103 & 0x1ffffff);
-{ uint64_t x106 = (x104 + x70);
-{ uint64_t x107 = (x106 >> 0x19);
-{ uint32_t x108 = ((uint32_t)x106 & 0x1ffffff);
-{ uint64_t x109 = (x107 + x69);
-{ uint64_t x110 = (x109 >> 0x19);
-{ uint32_t x111 = ((uint32_t)x109 & 0x1ffffff);
-{ uint64_t x112 = (x110 + x68);
-{ uint64_t x113 = (x112 >> 0x19);
-{ uint32_t x114 = ((uint32_t)x112 & 0x1ffffff);
-{ uint64_t x115 = (x113 + x67);
-{ uint64_t x116 = (x115 >> 0x19);
-{ uint32_t x117 = ((uint32_t)x115 & 0x1ffffff);
-{ uint64_t x118 = (x116 + x66);
-{ uint64_t x119 = (x118 >> 0x19);
-{ uint32_t x120 = ((uint32_t)x118 & 0x1ffffff);
-{ uint64_t x121 = (x119 + x65);
-{ uint64_t x122 = (x121 >> 0x19);
-{ uint32_t x123 = ((uint32_t)x121 & 0x1ffffff);
-{ uint64_t x124 = (x122 + x64);
-{ uint64_t x125 = (x124 >> 0x19);
-{ uint32_t x126 = ((uint32_t)x124 & 0x1ffffff);
-{ uint64_t x127 = (x81 + (0x1f * x125));
-{ uint32_t x128 = (uint32_t) (x127 >> 0x1a);
-{ uint32_t x129 = ((uint32_t)x127 & 0x3ffffff);
-{ uint32_t x130 = (x128 + x84);
-{ uint32_t x131 = (x130 >> 0x19);
-{ uint32_t x132 = (x130 & 0x1ffffff);
-out[0] = x126;
-out[1] = x123;
-out[2] = x120;
-out[3] = x117;
-out[4] = x114;
-out[5] = x111;
-out[6] = x108;
-out[7] = x105;
-out[8] = x102;
-out[9] = x99;
-out[10] = x96;
-out[11] = x93;
-out[12] = x90;
-out[13] = x131 + x87;
-out[14] = x132;
-out[15] = x129;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[16];
+static void femul(uint32_t out[16], const uint32_t in1[16], const uint32_t in2[16]) {
+ { const uint32_t x32 = in1[15];
+ { const uint32_t x33 = in1[14];
+ { const uint32_t x31 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x62 = in2[15];
+ { const uint32_t x63 = in2[14];
+ { const uint32_t x61 = in2[13];
+ { const uint32_t x59 = in2[12];
+ { const uint32_t x57 = in2[11];
+ { const uint32_t x55 = in2[10];
+ { const uint32_t x53 = in2[9];
+ { const uint32_t x51 = in2[8];
+ { const uint32_t x49 = in2[7];
+ { const uint32_t x47 = in2[6];
+ { const uint32_t x45 = in2[5];
+ { const uint32_t x43 = in2[4];
+ { const uint32_t x41 = in2[3];
+ { const uint32_t x39 = in2[2];
+ { const uint32_t x37 = in2[1];
+ { const uint32_t x35 = in2[0];
+ { uint64_t x64 = (((uint64_t)x5 * x62) + ((0x2 * ((uint64_t)x7 * x63)) + ((0x2 * ((uint64_t)x9 * x61)) + ((0x2 * ((uint64_t)x11 * x59)) + ((0x2 * ((uint64_t)x13 * x57)) + ((0x2 * ((uint64_t)x15 * x55)) + ((0x2 * ((uint64_t)x17 * x53)) + ((0x2 * ((uint64_t)x19 * x51)) + ((0x2 * ((uint64_t)x21 * x49)) + ((0x2 * ((uint64_t)x23 * x47)) + ((0x2 * ((uint64_t)x25 * x45)) + ((0x2 * ((uint64_t)x27 * x43)) + ((0x2 * ((uint64_t)x29 * x41)) + ((0x2 * ((uint64_t)x31 * x39)) + ((0x2 * ((uint64_t)x33 * x37)) + ((uint64_t)x32 * x35))))))))))))))));
+ { uint64_t x65 = ((((uint64_t)x5 * x63) + ((0x2 * ((uint64_t)x7 * x61)) + ((0x2 * ((uint64_t)x9 * x59)) + ((0x2 * ((uint64_t)x11 * x57)) + ((0x2 * ((uint64_t)x13 * x55)) + ((0x2 * ((uint64_t)x15 * x53)) + ((0x2 * ((uint64_t)x17 * x51)) + ((0x2 * ((uint64_t)x19 * x49)) + ((0x2 * ((uint64_t)x21 * x47)) + ((0x2 * ((uint64_t)x23 * x45)) + ((0x2 * ((uint64_t)x25 * x43)) + ((0x2 * ((uint64_t)x27 * x41)) + ((0x2 * ((uint64_t)x29 * x39)) + ((0x2 * ((uint64_t)x31 * x37)) + ((uint64_t)x33 * x35))))))))))))))) + (0x1f * ((uint64_t)x32 * x62)));
+ { uint64_t x66 = ((((uint64_t)x5 * x61) + ((0x2 * ((uint64_t)x7 * x59)) + ((0x2 * ((uint64_t)x9 * x57)) + ((0x2 * ((uint64_t)x11 * x55)) + ((0x2 * ((uint64_t)x13 * x53)) + ((0x2 * ((uint64_t)x15 * x51)) + ((0x2 * ((uint64_t)x17 * x49)) + ((0x2 * ((uint64_t)x19 * x47)) + ((0x2 * ((uint64_t)x21 * x45)) + ((0x2 * ((uint64_t)x23 * x43)) + ((0x2 * ((uint64_t)x25 * x41)) + ((0x2 * ((uint64_t)x27 * x39)) + ((0x2 * ((uint64_t)x29 * x37)) + ((uint64_t)x31 * x35)))))))))))))) + (0x1f * (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))));
+ { uint64_t x67 = ((((uint64_t)x5 * x59) + ((0x2 * ((uint64_t)x7 * x57)) + ((0x2 * ((uint64_t)x9 * x55)) + ((0x2 * ((uint64_t)x11 * x53)) + ((0x2 * ((uint64_t)x13 * x51)) + ((0x2 * ((uint64_t)x15 * x49)) + ((0x2 * ((uint64_t)x17 * x47)) + ((0x2 * ((uint64_t)x19 * x45)) + ((0x2 * ((uint64_t)x21 * x43)) + ((0x2 * ((uint64_t)x23 * x41)) + ((0x2 * ((uint64_t)x25 * x39)) + ((0x2 * ((uint64_t)x27 * x37)) + ((uint64_t)x29 * x35))))))))))))) + (0x1f * (((uint64_t)x31 * x62) + (((uint64_t)x33 * x63) + ((uint64_t)x32 * x61)))));
+ { uint64_t x68 = ((((uint64_t)x5 * x57) + ((0x2 * ((uint64_t)x7 * x55)) + ((0x2 * ((uint64_t)x9 * x53)) + ((0x2 * ((uint64_t)x11 * x51)) + ((0x2 * ((uint64_t)x13 * x49)) + ((0x2 * ((uint64_t)x15 * x47)) + ((0x2 * ((uint64_t)x17 * x45)) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + ((0x2 * ((uint64_t)x23 * x39)) + ((0x2 * ((uint64_t)x25 * x37)) + ((uint64_t)x27 * x35)))))))))))) + (0x1f * (((uint64_t)x29 * x62) + (((uint64_t)x31 * x63) + (((uint64_t)x33 * x61) + ((uint64_t)x32 * x59))))));
+ { uint64_t x69 = ((((uint64_t)x5 * x55) + ((0x2 * ((uint64_t)x7 * x53)) + ((0x2 * ((uint64_t)x9 * x51)) + ((0x2 * ((uint64_t)x11 * x49)) + ((0x2 * ((uint64_t)x13 * x47)) + ((0x2 * ((uint64_t)x15 * x45)) + ((0x2 * ((uint64_t)x17 * x43)) + ((0x2 * ((uint64_t)x19 * x41)) + ((0x2 * ((uint64_t)x21 * x39)) + ((0x2 * ((uint64_t)x23 * x37)) + ((uint64_t)x25 * x35))))))))))) + (0x1f * (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + (((uint64_t)x31 * x61) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))));
+ { uint64_t x70 = ((((uint64_t)x5 * x53) + ((0x2 * ((uint64_t)x7 * x51)) + ((0x2 * ((uint64_t)x9 * x49)) + ((0x2 * ((uint64_t)x11 * x47)) + ((0x2 * ((uint64_t)x13 * x45)) + ((0x2 * ((uint64_t)x15 * x43)) + ((0x2 * ((uint64_t)x17 * x41)) + ((0x2 * ((uint64_t)x19 * x39)) + ((0x2 * ((uint64_t)x21 * x37)) + ((uint64_t)x23 * x35)))))))))) + (0x1f * (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))));
+ { uint64_t x71 = ((((uint64_t)x5 * x51) + ((0x2 * ((uint64_t)x7 * x49)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + ((uint64_t)x21 * x35))))))))) + (0x1f * (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))));
+ { uint64_t x72 = ((((uint64_t)x5 * x49) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + ((0x2 * ((uint64_t)x11 * x43)) + ((0x2 * ((uint64_t)x13 * x41)) + ((0x2 * ((uint64_t)x15 * x39)) + ((0x2 * ((uint64_t)x17 * x37)) + ((uint64_t)x19 * x35)))))))) + (0x1f * (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51))))))))));
+ { uint64_t x73 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + ((0x2 * ((uint64_t)x9 * x43)) + ((0x2 * ((uint64_t)x11 * x41)) + ((0x2 * ((uint64_t)x13 * x39)) + ((0x2 * ((uint64_t)x15 * x37)) + ((uint64_t)x17 * x35))))))) + (0x1f * (((uint64_t)x19 * x62) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + (((uint64_t)x33 * x51) + ((uint64_t)x32 * x49)))))))))));
+ { uint64_t x74 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + ((0x2 * ((uint64_t)x11 * x39)) + ((0x2 * ((uint64_t)x13 * x37)) + ((uint64_t)x15 * x35)))))) + (0x1f * (((uint64_t)x17 * x62) + (((uint64_t)x19 * x63) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + (((uint64_t)x31 * x51) + (((uint64_t)x33 * x49) + ((uint64_t)x32 * x47))))))))))));
+ { uint64_t x75 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((uint64_t)x13 * x35))))) + (0x1f * (((uint64_t)x15 * x62) + (((uint64_t)x17 * x63) + (((uint64_t)x19 * x61) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + (((uint64_t)x29 * x51) + (((uint64_t)x31 * x49) + (((uint64_t)x33 * x47) + ((uint64_t)x32 * x45)))))))))))));
+ { uint64_t x76 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((uint64_t)x11 * x35)))) + (0x1f * (((uint64_t)x13 * x62) + (((uint64_t)x15 * x63) + (((uint64_t)x17 * x61) + (((uint64_t)x19 * x59) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + (((uint64_t)x31 * x47) + (((uint64_t)x33 * x45) + ((uint64_t)x32 * x43))))))))))))));
+ { uint64_t x77 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((uint64_t)x9 * x35))) + (0x1f * (((uint64_t)x11 * x62) + (((uint64_t)x13 * x63) + (((uint64_t)x15 * x61) + (((uint64_t)x17 * x59) + (((uint64_t)x19 * x57) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + (((uint64_t)x31 * x45) + (((uint64_t)x33 * x43) + ((uint64_t)x32 * x41)))))))))))))));
+ { uint64_t x78 = ((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) + (0x1f * (((uint64_t)x9 * x62) + (((uint64_t)x11 * x63) + (((uint64_t)x13 * x61) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + (((uint64_t)x33 * x41) + ((uint64_t)x32 * x39))))))))))))))));
+ { uint64_t x79 = (((uint64_t)x5 * x35) + (0x1f * ((0x2 * ((uint64_t)x7 * x62)) + ((0x2 * ((uint64_t)x9 * x63)) + ((0x2 * ((uint64_t)x11 * x61)) + ((0x2 * ((uint64_t)x13 * x59)) + ((0x2 * ((uint64_t)x15 * x57)) + ((0x2 * ((uint64_t)x17 * x55)) + ((0x2 * ((uint64_t)x19 * x53)) + ((0x2 * ((uint64_t)x21 * x51)) + ((0x2 * ((uint64_t)x23 * x49)) + ((0x2 * ((uint64_t)x25 * x47)) + ((0x2 * ((uint64_t)x27 * x45)) + ((0x2 * ((uint64_t)x29 * x43)) + ((0x2 * ((uint64_t)x31 * x41)) + ((0x2 * ((uint64_t)x33 * x39)) + (0x2 * ((uint64_t)x32 * x37))))))))))))))))));
+ { uint64_t x80 = (x79 >> 0x1a);
+ { uint32_t x81 = ((uint32_t)x79 & 0x3ffffff);
+ { uint64_t x82 = (x80 + x78);
+ { uint64_t x83 = (x82 >> 0x19);
+ { uint32_t x84 = ((uint32_t)x82 & 0x1ffffff);
+ { uint64_t x85 = (x83 + x77);
+ { uint64_t x86 = (x85 >> 0x19);
+ { uint32_t x87 = ((uint32_t)x85 & 0x1ffffff);
+ { uint64_t x88 = (x86 + x76);
+ { uint64_t x89 = (x88 >> 0x19);
+ { uint32_t x90 = ((uint32_t)x88 & 0x1ffffff);
+ { uint64_t x91 = (x89 + x75);
+ { uint64_t x92 = (x91 >> 0x19);
+ { uint32_t x93 = ((uint32_t)x91 & 0x1ffffff);
+ { uint64_t x94 = (x92 + x74);
+ { uint64_t x95 = (x94 >> 0x19);
+ { uint32_t x96 = ((uint32_t)x94 & 0x1ffffff);
+ { uint64_t x97 = (x95 + x73);
+ { uint64_t x98 = (x97 >> 0x19);
+ { uint32_t x99 = ((uint32_t)x97 & 0x1ffffff);
+ { uint64_t x100 = (x98 + x72);
+ { uint64_t x101 = (x100 >> 0x19);
+ { uint32_t x102 = ((uint32_t)x100 & 0x1ffffff);
+ { uint64_t x103 = (x101 + x71);
+ { uint64_t x104 = (x103 >> 0x19);
+ { uint32_t x105 = ((uint32_t)x103 & 0x1ffffff);
+ { uint64_t x106 = (x104 + x70);
+ { uint64_t x107 = (x106 >> 0x19);
+ { uint32_t x108 = ((uint32_t)x106 & 0x1ffffff);
+ { uint64_t x109 = (x107 + x69);
+ { uint64_t x110 = (x109 >> 0x19);
+ { uint32_t x111 = ((uint32_t)x109 & 0x1ffffff);
+ { uint64_t x112 = (x110 + x68);
+ { uint64_t x113 = (x112 >> 0x19);
+ { uint32_t x114 = ((uint32_t)x112 & 0x1ffffff);
+ { uint64_t x115 = (x113 + x67);
+ { uint64_t x116 = (x115 >> 0x19);
+ { uint32_t x117 = ((uint32_t)x115 & 0x1ffffff);
+ { uint64_t x118 = (x116 + x66);
+ { uint64_t x119 = (x118 >> 0x19);
+ { uint32_t x120 = ((uint32_t)x118 & 0x1ffffff);
+ { uint64_t x121 = (x119 + x65);
+ { uint64_t x122 = (x121 >> 0x19);
+ { uint32_t x123 = ((uint32_t)x121 & 0x1ffffff);
+ { uint64_t x124 = (x122 + x64);
+ { uint64_t x125 = (x124 >> 0x19);
+ { uint32_t x126 = ((uint32_t)x124 & 0x1ffffff);
+ { uint64_t x127 = (x81 + (0x1f * x125));
+ { uint32_t x128 = (uint32_t) (x127 >> 0x1a);
+ { uint32_t x129 = ((uint32_t)x127 & 0x3ffffff);
+ { uint32_t x130 = (x128 + x84);
+ { uint32_t x131 = (x130 >> 0x19);
+ { uint32_t x132 = (x130 & 0x1ffffff);
+ out[0] = x129;
+ out[1] = x132;
+ out[2] = (x131 + x87);
+ out[3] = x90;
+ out[4] = x93;
+ out[5] = x96;
+ out[6] = x99;
+ out[7] = x102;
+ out[8] = x105;
+ out[9] = x108;
+ out[10] = x111;
+ out[11] = x114;
+ out[12] = x117;
+ out[13] = x120;
+ out[14] = x123;
+ out[15] = x126;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e401m31/fesquare.c b/src/Specific/solinas32_2e401m31/fesquare.c
index 5379438ee..580bcf5f5 100644
--- a/src/Specific/solinas32_2e401m31/fesquare.c
+++ b/src/Specific/solinas32_2e401m31/fesquare.c
@@ -1,106 +1,104 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x31 = (((uint64_t)x2 * x29) + ((0x2 * ((uint64_t)x4 * x30)) + ((0x2 * ((uint64_t)x6 * x28)) + ((0x2 * ((uint64_t)x8 * x26)) + ((0x2 * ((uint64_t)x10 * x24)) + ((0x2 * ((uint64_t)x12 * x22)) + ((0x2 * ((uint64_t)x14 * x20)) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + ((0x2 * ((uint64_t)x20 * x14)) + ((0x2 * ((uint64_t)x22 * x12)) + ((0x2 * ((uint64_t)x24 * x10)) + ((0x2 * ((uint64_t)x26 * x8)) + ((0x2 * ((uint64_t)x28 * x6)) + ((0x2 * ((uint64_t)x30 * x4)) + ((uint64_t)x29 * x2))))))))))))))));
-{ uint64_t x32 = ((((uint64_t)x2 * x30) + ((0x2 * ((uint64_t)x4 * x28)) + ((0x2 * ((uint64_t)x6 * x26)) + ((0x2 * ((uint64_t)x8 * x24)) + ((0x2 * ((uint64_t)x10 * x22)) + ((0x2 * ((uint64_t)x12 * x20)) + ((0x2 * ((uint64_t)x14 * x18)) + ((0x2 * ((uint64_t)x16 * x16)) + ((0x2 * ((uint64_t)x18 * x14)) + ((0x2 * ((uint64_t)x20 * x12)) + ((0x2 * ((uint64_t)x22 * x10)) + ((0x2 * ((uint64_t)x24 * x8)) + ((0x2 * ((uint64_t)x26 * x6)) + ((0x2 * ((uint64_t)x28 * x4)) + ((uint64_t)x30 * x2))))))))))))))) + (0x1f * ((uint64_t)x29 * x29)));
-{ uint64_t x33 = ((((uint64_t)x2 * x28) + ((0x2 * ((uint64_t)x4 * x26)) + ((0x2 * ((uint64_t)x6 * x24)) + ((0x2 * ((uint64_t)x8 * x22)) + ((0x2 * ((uint64_t)x10 * x20)) + ((0x2 * ((uint64_t)x12 * x18)) + ((0x2 * ((uint64_t)x14 * x16)) + ((0x2 * ((uint64_t)x16 * x14)) + ((0x2 * ((uint64_t)x18 * x12)) + ((0x2 * ((uint64_t)x20 * x10)) + ((0x2 * ((uint64_t)x22 * x8)) + ((0x2 * ((uint64_t)x24 * x6)) + ((0x2 * ((uint64_t)x26 * x4)) + ((uint64_t)x28 * x2)))))))))))))) + (0x1f * (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))));
-{ uint64_t x34 = ((((uint64_t)x2 * x26) + ((0x2 * ((uint64_t)x4 * x24)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + ((0x2 * ((uint64_t)x14 * x14)) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + ((0x2 * ((uint64_t)x24 * x4)) + ((uint64_t)x26 * x2))))))))))))) + (0x1f * (((uint64_t)x28 * x29) + (((uint64_t)x30 * x30) + ((uint64_t)x29 * x28)))));
-{ uint64_t x35 = ((((uint64_t)x2 * x24) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + ((0x2 * ((uint64_t)x8 * x18)) + ((0x2 * ((uint64_t)x10 * x16)) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + ((0x2 * ((uint64_t)x16 * x10)) + ((0x2 * ((uint64_t)x18 * x8)) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x24 * x2)))))))))))) + (0x1f * (((uint64_t)x26 * x29) + (((uint64_t)x28 * x30) + (((uint64_t)x30 * x28) + ((uint64_t)x29 * x26))))));
-{ uint64_t x36 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x1f * (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + (((uint64_t)x28 * x28) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))));
-{ uint64_t x37 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x1f * (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))));
-{ uint64_t x38 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x1f * (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))));
-{ uint64_t x39 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x1f * (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18))))))))));
-{ uint64_t x40 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x1f * (((uint64_t)x16 * x29) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + (((uint64_t)x30 * x18) + ((uint64_t)x29 * x16)))))))))));
-{ uint64_t x41 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x1f * (((uint64_t)x14 * x29) + (((uint64_t)x16 * x30) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + (((uint64_t)x28 * x18) + (((uint64_t)x30 * x16) + ((uint64_t)x29 * x14))))))))))));
-{ uint64_t x42 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x1f * (((uint64_t)x12 * x29) + (((uint64_t)x14 * x30) + (((uint64_t)x16 * x28) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + (((uint64_t)x26 * x18) + (((uint64_t)x28 * x16) + (((uint64_t)x30 * x14) + ((uint64_t)x29 * x12)))))))))))));
-{ uint64_t x43 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x1f * (((uint64_t)x10 * x29) + (((uint64_t)x12 * x30) + (((uint64_t)x14 * x28) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + (((uint64_t)x26 * x16) + (((uint64_t)x28 * x14) + (((uint64_t)x30 * x12) + ((uint64_t)x29 * x10))))))))))))));
-{ uint64_t x44 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x1f * (((uint64_t)x8 * x29) + (((uint64_t)x10 * x30) + (((uint64_t)x12 * x28) + (((uint64_t)x14 * x26) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + (((uint64_t)x26 * x14) + (((uint64_t)x28 * x12) + (((uint64_t)x30 * x10) + ((uint64_t)x29 * x8)))))))))))))));
-{ uint64_t x45 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x1f * (((uint64_t)x6 * x29) + (((uint64_t)x8 * x30) + (((uint64_t)x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + (((uint64_t)x28 * x10) + (((uint64_t)x30 * x8) + ((uint64_t)x29 * x6))))))))))))))));
-{ uint64_t x46 = (((uint64_t)x2 * x2) + (0x1f * ((0x2 * ((uint64_t)x4 * x29)) + ((0x2 * ((uint64_t)x6 * x30)) + ((0x2 * ((uint64_t)x8 * x28)) + ((0x2 * ((uint64_t)x10 * x26)) + ((0x2 * ((uint64_t)x12 * x24)) + ((0x2 * ((uint64_t)x14 * x22)) + ((0x2 * ((uint64_t)x16 * x20)) + ((0x2 * ((uint64_t)x18 * x18)) + ((0x2 * ((uint64_t)x20 * x16)) + ((0x2 * ((uint64_t)x22 * x14)) + ((0x2 * ((uint64_t)x24 * x12)) + ((0x2 * ((uint64_t)x26 * x10)) + ((0x2 * ((uint64_t)x28 * x8)) + ((0x2 * ((uint64_t)x30 * x6)) + (0x2 * ((uint64_t)x29 * x4))))))))))))))))));
-{ uint64_t x47 = (x46 >> 0x1a);
-{ uint32_t x48 = ((uint32_t)x46 & 0x3ffffff);
-{ uint64_t x49 = (x47 + x45);
-{ uint64_t x50 = (x49 >> 0x19);
-{ uint32_t x51 = ((uint32_t)x49 & 0x1ffffff);
-{ uint64_t x52 = (x50 + x44);
-{ uint64_t x53 = (x52 >> 0x19);
-{ uint32_t x54 = ((uint32_t)x52 & 0x1ffffff);
-{ uint64_t x55 = (x53 + x43);
-{ uint64_t x56 = (x55 >> 0x19);
-{ uint32_t x57 = ((uint32_t)x55 & 0x1ffffff);
-{ uint64_t x58 = (x56 + x42);
-{ uint64_t x59 = (x58 >> 0x19);
-{ uint32_t x60 = ((uint32_t)x58 & 0x1ffffff);
-{ uint64_t x61 = (x59 + x41);
-{ uint64_t x62 = (x61 >> 0x19);
-{ uint32_t x63 = ((uint32_t)x61 & 0x1ffffff);
-{ uint64_t x64 = (x62 + x40);
-{ uint64_t x65 = (x64 >> 0x19);
-{ uint32_t x66 = ((uint32_t)x64 & 0x1ffffff);
-{ uint64_t x67 = (x65 + x39);
-{ uint64_t x68 = (x67 >> 0x19);
-{ uint32_t x69 = ((uint32_t)x67 & 0x1ffffff);
-{ uint64_t x70 = (x68 + x38);
-{ uint64_t x71 = (x70 >> 0x19);
-{ uint32_t x72 = ((uint32_t)x70 & 0x1ffffff);
-{ uint64_t x73 = (x71 + x37);
-{ uint64_t x74 = (x73 >> 0x19);
-{ uint32_t x75 = ((uint32_t)x73 & 0x1ffffff);
-{ uint64_t x76 = (x74 + x36);
-{ uint64_t x77 = (x76 >> 0x19);
-{ uint32_t x78 = ((uint32_t)x76 & 0x1ffffff);
-{ uint64_t x79 = (x77 + x35);
-{ uint64_t x80 = (x79 >> 0x19);
-{ uint32_t x81 = ((uint32_t)x79 & 0x1ffffff);
-{ uint64_t x82 = (x80 + x34);
-{ uint64_t x83 = (x82 >> 0x19);
-{ uint32_t x84 = ((uint32_t)x82 & 0x1ffffff);
-{ uint64_t x85 = (x83 + x33);
-{ uint64_t x86 = (x85 >> 0x19);
-{ uint32_t x87 = ((uint32_t)x85 & 0x1ffffff);
-{ uint64_t x88 = (x86 + x32);
-{ uint64_t x89 = (x88 >> 0x19);
-{ uint32_t x90 = ((uint32_t)x88 & 0x1ffffff);
-{ uint64_t x91 = (x89 + x31);
-{ uint64_t x92 = (x91 >> 0x19);
-{ uint32_t x93 = ((uint32_t)x91 & 0x1ffffff);
-{ uint64_t x94 = (x48 + (0x1f * x92));
-{ uint32_t x95 = (uint32_t) (x94 >> 0x1a);
-{ uint32_t x96 = ((uint32_t)x94 & 0x3ffffff);
-{ uint32_t x97 = (x95 + x51);
-{ uint32_t x98 = (x97 >> 0x19);
-{ uint32_t x99 = (x97 & 0x1ffffff);
-out[0] = x93;
-out[1] = x90;
-out[2] = x87;
-out[3] = x84;
-out[4] = x81;
-out[5] = x78;
-out[6] = x75;
-out[7] = x72;
-out[8] = x69;
-out[9] = x66;
-out[10] = x63;
-out[11] = x60;
-out[12] = x57;
-out[13] = x98 + x54;
-out[14] = x99;
-out[15] = x96;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[16];
+static void fesquare(uint32_t out[16], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x31 = (((uint64_t)x2 * x29) + ((0x2 * ((uint64_t)x4 * x30)) + ((0x2 * ((uint64_t)x6 * x28)) + ((0x2 * ((uint64_t)x8 * x26)) + ((0x2 * ((uint64_t)x10 * x24)) + ((0x2 * ((uint64_t)x12 * x22)) + ((0x2 * ((uint64_t)x14 * x20)) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + ((0x2 * ((uint64_t)x20 * x14)) + ((0x2 * ((uint64_t)x22 * x12)) + ((0x2 * ((uint64_t)x24 * x10)) + ((0x2 * ((uint64_t)x26 * x8)) + ((0x2 * ((uint64_t)x28 * x6)) + ((0x2 * ((uint64_t)x30 * x4)) + ((uint64_t)x29 * x2))))))))))))))));
+ { uint64_t x32 = ((((uint64_t)x2 * x30) + ((0x2 * ((uint64_t)x4 * x28)) + ((0x2 * ((uint64_t)x6 * x26)) + ((0x2 * ((uint64_t)x8 * x24)) + ((0x2 * ((uint64_t)x10 * x22)) + ((0x2 * ((uint64_t)x12 * x20)) + ((0x2 * ((uint64_t)x14 * x18)) + ((0x2 * ((uint64_t)x16 * x16)) + ((0x2 * ((uint64_t)x18 * x14)) + ((0x2 * ((uint64_t)x20 * x12)) + ((0x2 * ((uint64_t)x22 * x10)) + ((0x2 * ((uint64_t)x24 * x8)) + ((0x2 * ((uint64_t)x26 * x6)) + ((0x2 * ((uint64_t)x28 * x4)) + ((uint64_t)x30 * x2))))))))))))))) + (0x1f * ((uint64_t)x29 * x29)));
+ { uint64_t x33 = ((((uint64_t)x2 * x28) + ((0x2 * ((uint64_t)x4 * x26)) + ((0x2 * ((uint64_t)x6 * x24)) + ((0x2 * ((uint64_t)x8 * x22)) + ((0x2 * ((uint64_t)x10 * x20)) + ((0x2 * ((uint64_t)x12 * x18)) + ((0x2 * ((uint64_t)x14 * x16)) + ((0x2 * ((uint64_t)x16 * x14)) + ((0x2 * ((uint64_t)x18 * x12)) + ((0x2 * ((uint64_t)x20 * x10)) + ((0x2 * ((uint64_t)x22 * x8)) + ((0x2 * ((uint64_t)x24 * x6)) + ((0x2 * ((uint64_t)x26 * x4)) + ((uint64_t)x28 * x2)))))))))))))) + (0x1f * (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))));
+ { uint64_t x34 = ((((uint64_t)x2 * x26) + ((0x2 * ((uint64_t)x4 * x24)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + ((0x2 * ((uint64_t)x14 * x14)) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + ((0x2 * ((uint64_t)x24 * x4)) + ((uint64_t)x26 * x2))))))))))))) + (0x1f * (((uint64_t)x28 * x29) + (((uint64_t)x30 * x30) + ((uint64_t)x29 * x28)))));
+ { uint64_t x35 = ((((uint64_t)x2 * x24) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + ((0x2 * ((uint64_t)x8 * x18)) + ((0x2 * ((uint64_t)x10 * x16)) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + ((0x2 * ((uint64_t)x16 * x10)) + ((0x2 * ((uint64_t)x18 * x8)) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x24 * x2)))))))))))) + (0x1f * (((uint64_t)x26 * x29) + (((uint64_t)x28 * x30) + (((uint64_t)x30 * x28) + ((uint64_t)x29 * x26))))));
+ { uint64_t x36 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x1f * (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + (((uint64_t)x28 * x28) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))));
+ { uint64_t x37 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x1f * (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))));
+ { uint64_t x38 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x1f * (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))));
+ { uint64_t x39 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x1f * (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18))))))))));
+ { uint64_t x40 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x1f * (((uint64_t)x16 * x29) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + (((uint64_t)x30 * x18) + ((uint64_t)x29 * x16)))))))))));
+ { uint64_t x41 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x1f * (((uint64_t)x14 * x29) + (((uint64_t)x16 * x30) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + (((uint64_t)x28 * x18) + (((uint64_t)x30 * x16) + ((uint64_t)x29 * x14))))))))))));
+ { uint64_t x42 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x1f * (((uint64_t)x12 * x29) + (((uint64_t)x14 * x30) + (((uint64_t)x16 * x28) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + (((uint64_t)x26 * x18) + (((uint64_t)x28 * x16) + (((uint64_t)x30 * x14) + ((uint64_t)x29 * x12)))))))))))));
+ { uint64_t x43 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x1f * (((uint64_t)x10 * x29) + (((uint64_t)x12 * x30) + (((uint64_t)x14 * x28) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + (((uint64_t)x26 * x16) + (((uint64_t)x28 * x14) + (((uint64_t)x30 * x12) + ((uint64_t)x29 * x10))))))))))))));
+ { uint64_t x44 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x1f * (((uint64_t)x8 * x29) + (((uint64_t)x10 * x30) + (((uint64_t)x12 * x28) + (((uint64_t)x14 * x26) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + (((uint64_t)x26 * x14) + (((uint64_t)x28 * x12) + (((uint64_t)x30 * x10) + ((uint64_t)x29 * x8)))))))))))))));
+ { uint64_t x45 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x1f * (((uint64_t)x6 * x29) + (((uint64_t)x8 * x30) + (((uint64_t)x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + (((uint64_t)x28 * x10) + (((uint64_t)x30 * x8) + ((uint64_t)x29 * x6))))))))))))))));
+ { uint64_t x46 = (((uint64_t)x2 * x2) + (0x1f * ((0x2 * ((uint64_t)x4 * x29)) + ((0x2 * ((uint64_t)x6 * x30)) + ((0x2 * ((uint64_t)x8 * x28)) + ((0x2 * ((uint64_t)x10 * x26)) + ((0x2 * ((uint64_t)x12 * x24)) + ((0x2 * ((uint64_t)x14 * x22)) + ((0x2 * ((uint64_t)x16 * x20)) + ((0x2 * ((uint64_t)x18 * x18)) + ((0x2 * ((uint64_t)x20 * x16)) + ((0x2 * ((uint64_t)x22 * x14)) + ((0x2 * ((uint64_t)x24 * x12)) + ((0x2 * ((uint64_t)x26 * x10)) + ((0x2 * ((uint64_t)x28 * x8)) + ((0x2 * ((uint64_t)x30 * x6)) + (0x2 * ((uint64_t)x29 * x4))))))))))))))))));
+ { uint64_t x47 = (x46 >> 0x1a);
+ { uint32_t x48 = ((uint32_t)x46 & 0x3ffffff);
+ { uint64_t x49 = (x47 + x45);
+ { uint64_t x50 = (x49 >> 0x19);
+ { uint32_t x51 = ((uint32_t)x49 & 0x1ffffff);
+ { uint64_t x52 = (x50 + x44);
+ { uint64_t x53 = (x52 >> 0x19);
+ { uint32_t x54 = ((uint32_t)x52 & 0x1ffffff);
+ { uint64_t x55 = (x53 + x43);
+ { uint64_t x56 = (x55 >> 0x19);
+ { uint32_t x57 = ((uint32_t)x55 & 0x1ffffff);
+ { uint64_t x58 = (x56 + x42);
+ { uint64_t x59 = (x58 >> 0x19);
+ { uint32_t x60 = ((uint32_t)x58 & 0x1ffffff);
+ { uint64_t x61 = (x59 + x41);
+ { uint64_t x62 = (x61 >> 0x19);
+ { uint32_t x63 = ((uint32_t)x61 & 0x1ffffff);
+ { uint64_t x64 = (x62 + x40);
+ { uint64_t x65 = (x64 >> 0x19);
+ { uint32_t x66 = ((uint32_t)x64 & 0x1ffffff);
+ { uint64_t x67 = (x65 + x39);
+ { uint64_t x68 = (x67 >> 0x19);
+ { uint32_t x69 = ((uint32_t)x67 & 0x1ffffff);
+ { uint64_t x70 = (x68 + x38);
+ { uint64_t x71 = (x70 >> 0x19);
+ { uint32_t x72 = ((uint32_t)x70 & 0x1ffffff);
+ { uint64_t x73 = (x71 + x37);
+ { uint64_t x74 = (x73 >> 0x19);
+ { uint32_t x75 = ((uint32_t)x73 & 0x1ffffff);
+ { uint64_t x76 = (x74 + x36);
+ { uint64_t x77 = (x76 >> 0x19);
+ { uint32_t x78 = ((uint32_t)x76 & 0x1ffffff);
+ { uint64_t x79 = (x77 + x35);
+ { uint64_t x80 = (x79 >> 0x19);
+ { uint32_t x81 = ((uint32_t)x79 & 0x1ffffff);
+ { uint64_t x82 = (x80 + x34);
+ { uint64_t x83 = (x82 >> 0x19);
+ { uint32_t x84 = ((uint32_t)x82 & 0x1ffffff);
+ { uint64_t x85 = (x83 + x33);
+ { uint64_t x86 = (x85 >> 0x19);
+ { uint32_t x87 = ((uint32_t)x85 & 0x1ffffff);
+ { uint64_t x88 = (x86 + x32);
+ { uint64_t x89 = (x88 >> 0x19);
+ { uint32_t x90 = ((uint32_t)x88 & 0x1ffffff);
+ { uint64_t x91 = (x89 + x31);
+ { uint64_t x92 = (x91 >> 0x19);
+ { uint32_t x93 = ((uint32_t)x91 & 0x1ffffff);
+ { uint64_t x94 = (x48 + (0x1f * x92));
+ { uint32_t x95 = (uint32_t) (x94 >> 0x1a);
+ { uint32_t x96 = ((uint32_t)x94 & 0x3ffffff);
+ { uint32_t x97 = (x95 + x51);
+ { uint32_t x98 = (x97 >> 0x19);
+ { uint32_t x99 = (x97 & 0x1ffffff);
+ out[0] = x96;
+ out[1] = x99;
+ out[2] = (x98 + x54);
+ out[3] = x57;
+ out[4] = x60;
+ out[5] = x63;
+ out[6] = x66;
+ out[7] = x69;
+ out[8] = x72;
+ out[9] = x75;
+ out[10] = x78;
+ out[11] = x81;
+ out[12] = x84;
+ out[13] = x87;
+ out[14] = x90;
+ out[15] = x93;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e401m31/freeze.c b/src/Specific/solinas32_2e401m31/freeze.c
index d3f0fe91e..2bc686d96 100644
--- a/src/Specific/solinas32_2e401m31/freeze.c
+++ b/src/Specific/solinas32_2e401m31/freeze.c
@@ -1,25 +1,84 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x32;
-out[1] = uint8_t x33 = Op Syntax.SubWithGetBorrow 26 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3ffffe1;;
+static void freeze(uint32_t out[16], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x3ffffe1);
+ { uint32_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x4, 0x1ffffff);
+ { uint32_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x36, Return x6, 0x1ffffff);
+ { uint32_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x39, Return x8, 0x1ffffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x10, 0x1ffffff);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x12, 0x1ffffff);
+ { uint32_t x50, uint8_t x51 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x48, Return x14, 0x1ffffff);
+ { uint32_t x53, uint8_t x54 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x51, Return x16, 0x1ffffff);
+ { uint32_t x56, uint8_t x57 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x54, Return x18, 0x1ffffff);
+ { uint32_t x59, uint8_t x60 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x20, 0x1ffffff);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x22, 0x1ffffff);
+ { uint32_t x65, uint8_t x66 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x24, 0x1ffffff);
+ { uint32_t x68, uint8_t x69 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x66, Return x26, 0x1ffffff);
+ { uint32_t x71, uint8_t x72 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x69, Return x28, 0x1ffffff);
+ { uint32_t x74, uint8_t x75 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x30, 0x1ffffff);
+ { uint32_t x77, uint8_t x78 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x29, 0x1ffffff);
+ { uint32_t x79 = (uint32_t)cmovznz(x78, 0x0, 0xffffffff);
+ { uint32_t x80 = (x79 & 0x3ffffe1);
+ { uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x32, Return x80);
+ { uint32_t x84 = (x79 & 0x1ffffff);
+ { uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x35, Return x84);
+ { uint32_t x88 = (x79 & 0x1ffffff);
+ { uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x38, Return x88);
+ { uint32_t x92 = (x79 & 0x1ffffff);
+ { uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x41, Return x92);
+ { uint32_t x96 = (x79 & 0x1ffffff);
+ { uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x44, Return x96);
+ { uint32_t x100 = (x79 & 0x1ffffff);
+ { uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x47, Return x100);
+ { uint32_t x104 = (x79 & 0x1ffffff);
+ { uint32_t x106, uint8_t x107 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x50, Return x104);
+ { uint32_t x108 = (x79 & 0x1ffffff);
+ { uint32_t x110, uint8_t x111 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x107, Return x53, Return x108);
+ { uint32_t x112 = (x79 & 0x1ffffff);
+ { uint32_t x114, uint8_t x115 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x111, Return x56, Return x112);
+ { uint32_t x116 = (x79 & 0x1ffffff);
+ { uint32_t x118, uint8_t x119 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x115, Return x59, Return x116);
+ { uint32_t x120 = (x79 & 0x1ffffff);
+ { uint32_t x122, uint8_t x123 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x119, Return x62, Return x120);
+ { uint32_t x124 = (x79 & 0x1ffffff);
+ { uint32_t x126, uint8_t x127 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x123, Return x65, Return x124);
+ { uint32_t x128 = (x79 & 0x1ffffff);
+ { uint32_t x130, uint8_t x131 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x127, Return x68, Return x128);
+ { uint32_t x132 = (x79 & 0x1ffffff);
+ { uint32_t x134, uint8_t x135 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x131, Return x71, Return x132);
+ { uint32_t x136 = (x79 & 0x1ffffff);
+ { uint32_t x138, uint8_t x139 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x135, Return x74, Return x136);
+ { uint32_t x140 = (x79 & 0x1ffffff);
+ { uint32_t x142, uint8_t _ = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x139, Return x77, Return x140);
+ out[0] = x82;
+ out[1] = x86;
+ out[2] = x90;
+ out[3] = x94;
+ out[4] = x98;
+ out[5] = x102;
+ out[6] = x106;
+ out[7] = x110;
+ out[8] = x114;
+ out[9] = x118;
+ out[10] = x122;
+ out[11] = x126;
+ out[12] = x130;
+ out[13] = x134;
+ out[14] = x138;
+ out[15] = x142;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e416m2e208m1/femul.c b/src/Specific/solinas32_2e416m2e208m1/femul.c
index 6a8378ba7..44254473b 100644
--- a/src/Specific/solinas32_2e416m2e208m1/femul.c
+++ b/src/Specific/solinas32_2e416m2e208m1/femul.c
@@ -1,131 +1,145 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
-{ uint64_t x64 = (((uint64_t)(x19 + x32) * (x49 + x62)) - ((uint64_t)x19 * x49));
-{ uint64_t x65 = ((((uint64_t)(x17 + x33) * (x49 + x62)) + ((uint64_t)(x19 + x32) * (x47 + x63))) - (((uint64_t)x17 * x49) + ((uint64_t)x19 * x47)));
-{ uint64_t x66 = ((((uint64_t)(x15 + x31) * (x49 + x62)) + (((uint64_t)(x17 + x33) * (x47 + x63)) + ((uint64_t)(x19 + x32) * (x45 + x61)))) - (((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + ((uint64_t)x19 * x45))));
-{ uint64_t x67 = ((((uint64_t)(x13 + x29) * (x49 + x62)) + (((uint64_t)(x15 + x31) * (x47 + x63)) + (((uint64_t)(x17 + x33) * (x45 + x61)) + ((uint64_t)(x19 + x32) * (x43 + x59))))) - (((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + ((uint64_t)x19 * x43)))));
-{ uint64_t x68 = ((((uint64_t)(x11 + x27) * (x49 + x62)) + (((uint64_t)(x13 + x29) * (x47 + x63)) + (((uint64_t)(x15 + x31) * (x45 + x61)) + (((uint64_t)(x17 + x33) * (x43 + x59)) + ((uint64_t)(x19 + x32) * (x41 + x57)))))) - (((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((uint64_t)x19 * x41))))));
-{ uint64_t x69 = ((((uint64_t)(x9 + x25) * (x49 + x62)) + (((uint64_t)(x11 + x27) * (x47 + x63)) + (((uint64_t)(x13 + x29) * (x45 + x61)) + (((uint64_t)(x15 + x31) * (x43 + x59)) + (((uint64_t)(x17 + x33) * (x41 + x57)) + ((uint64_t)(x19 + x32) * (x39 + x55))))))) - (((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + ((uint64_t)x19 * x39)))))));
-{ uint64_t x70 = ((((uint64_t)(x7 + x23) * (x49 + x62)) + (((uint64_t)(x9 + x25) * (x47 + x63)) + (((uint64_t)(x11 + x27) * (x45 + x61)) + (((uint64_t)(x13 + x29) * (x43 + x59)) + (((uint64_t)(x15 + x31) * (x41 + x57)) + (((uint64_t)(x17 + x33) * (x39 + x55)) + ((uint64_t)(x19 + x32) * (x37 + x53)))))))) - (((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + ((uint64_t)x19 * x37))))))));
-{ uint64_t x71 = ((((uint64_t)(x5 + x21) * (x49 + x62)) + (((uint64_t)(x7 + x23) * (x47 + x63)) + (((uint64_t)(x9 + x25) * (x45 + x61)) + (((uint64_t)(x11 + x27) * (x43 + x59)) + (((uint64_t)(x13 + x29) * (x41 + x57)) + (((uint64_t)(x15 + x31) * (x39 + x55)) + (((uint64_t)(x17 + x33) * (x37 + x53)) + ((uint64_t)(x19 + x32) * (x35 + x51))))))))) - (((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((uint64_t)x19 * x35)))))))));
-{ uint64_t x72 = ((((uint64_t)(x5 + x21) * (x47 + x63)) + (((uint64_t)(x7 + x23) * (x45 + x61)) + (((uint64_t)(x9 + x25) * (x43 + x59)) + (((uint64_t)(x11 + x27) * (x41 + x57)) + (((uint64_t)(x13 + x29) * (x39 + x55)) + (((uint64_t)(x15 + x31) * (x37 + x53)) + ((uint64_t)(x17 + x33) * (x35 + x51)))))))) - (((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((uint64_t)x17 * x35))))))));
-{ uint64_t x73 = ((((uint64_t)(x5 + x21) * (x45 + x61)) + (((uint64_t)(x7 + x23) * (x43 + x59)) + (((uint64_t)(x9 + x25) * (x41 + x57)) + (((uint64_t)(x11 + x27) * (x39 + x55)) + (((uint64_t)(x13 + x29) * (x37 + x53)) + ((uint64_t)(x15 + x31) * (x35 + x51))))))) - (((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + ((uint64_t)x15 * x35)))))));
-{ uint64_t x74 = ((((uint64_t)(x5 + x21) * (x43 + x59)) + (((uint64_t)(x7 + x23) * (x41 + x57)) + (((uint64_t)(x9 + x25) * (x39 + x55)) + (((uint64_t)(x11 + x27) * (x37 + x53)) + ((uint64_t)(x13 + x29) * (x35 + x51)))))) - (((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + ((uint64_t)x13 * x35))))));
-{ uint64_t x75 = ((((uint64_t)(x5 + x21) * (x41 + x57)) + (((uint64_t)(x7 + x23) * (x39 + x55)) + (((uint64_t)(x9 + x25) * (x37 + x53)) + ((uint64_t)(x11 + x27) * (x35 + x51))))) - (((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + ((uint64_t)x11 * x35)))));
-{ uint64_t x76 = ((((uint64_t)(x5 + x21) * (x39 + x55)) + (((uint64_t)(x7 + x23) * (x37 + x53)) + ((uint64_t)(x9 + x25) * (x35 + x51)))) - (((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + ((uint64_t)x9 * x35))));
-{ uint64_t x77 = ((((uint64_t)(x5 + x21) * (x37 + x53)) + ((uint64_t)(x7 + x23) * (x35 + x51))) - (((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)));
-{ uint64_t x78 = (((uint64_t)(x5 + x21) * (x35 + x51)) - ((uint64_t)x5 * x35));
-{ uint64_t x79 = (((((uint64_t)x19 * x49) + ((uint64_t)x32 * x62)) + x72) + x64);
-{ uint64_t x80 = ((((((uint64_t)x17 * x49) + ((uint64_t)x19 * x47)) + (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))) + x73) + x65);
-{ uint64_t x81 = ((((((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + ((uint64_t)x19 * x45))) + (((uint64_t)x31 * x62) + (((uint64_t)x33 * x63) + ((uint64_t)x32 * x61)))) + x74) + x66);
-{ uint64_t x82 = ((((((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + ((uint64_t)x19 * x43)))) + (((uint64_t)x29 * x62) + (((uint64_t)x31 * x63) + (((uint64_t)x33 * x61) + ((uint64_t)x32 * x59))))) + x75) + x67);
-{ uint64_t x83 = ((((((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((uint64_t)x19 * x41))))) + (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + (((uint64_t)x31 * x61) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))) + x76) + x68);
-{ uint64_t x84 = ((((((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + ((uint64_t)x19 * x39)))))) + (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))) + x77) + x69);
-{ uint64_t x85 = ((((((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + ((uint64_t)x19 * x37))))))) + (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))) + x78) + x70);
-{ uint64_t x86 = ((((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((uint64_t)x19 * x35)))))))) + (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51)))))))));
-{ uint64_t x87 = (((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((uint64_t)x17 * x35))))))) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + ((uint64_t)x33 * x51)))))))) + x64);
-{ uint64_t x88 = (((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + ((uint64_t)x15 * x35)))))) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + ((uint64_t)x31 * x51))))))) + x65);
-{ uint64_t x89 = (((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + ((uint64_t)x13 * x35))))) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + ((uint64_t)x29 * x51)))))) + x66);
-{ uint64_t x90 = (((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + ((uint64_t)x11 * x35)))) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + ((uint64_t)x27 * x51))))) + x67);
-{ uint64_t x91 = (((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + ((uint64_t)x9 * x35))) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + ((uint64_t)x25 * x51)))) + x68);
-{ uint64_t x92 = (((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) + (((uint64_t)x21 * x53) + ((uint64_t)x23 * x51))) + x69);
-{ uint64_t x93 = ((((uint64_t)x5 * x35) + ((uint64_t)x21 * x51)) + x70);
-{ uint64_t x94 = (x86 >> 0x1a);
-{ uint32_t x95 = ((uint32_t)x86 & 0x3ffffff);
-{ uint64_t x96 = (x71 >> 0x1a);
-{ uint32_t x97 = ((uint32_t)x71 & 0x3ffffff);
-{ uint64_t x98 = ((0x4000000 * x96) + x97);
-{ uint64_t x99 = (x98 >> 0x1a);
-{ uint32_t x100 = ((uint32_t)x98 & 0x3ffffff);
-{ uint64_t x101 = ((x94 + x85) + x99);
-{ uint64_t x102 = (x101 >> 0x1a);
-{ uint32_t x103 = ((uint32_t)x101 & 0x3ffffff);
-{ uint64_t x104 = (x93 + x99);
-{ uint64_t x105 = (x104 >> 0x1a);
-{ uint32_t x106 = ((uint32_t)x104 & 0x3ffffff);
-{ uint64_t x107 = (x102 + x84);
-{ uint64_t x108 = (x107 >> 0x1a);
-{ uint32_t x109 = ((uint32_t)x107 & 0x3ffffff);
-{ uint64_t x110 = (x105 + x92);
-{ uint64_t x111 = (x110 >> 0x1a);
-{ uint32_t x112 = ((uint32_t)x110 & 0x3ffffff);
-{ uint64_t x113 = (x108 + x83);
-{ uint64_t x114 = (x113 >> 0x1a);
-{ uint32_t x115 = ((uint32_t)x113 & 0x3ffffff);
-{ uint64_t x116 = (x111 + x91);
-{ uint64_t x117 = (x116 >> 0x1a);
-{ uint32_t x118 = ((uint32_t)x116 & 0x3ffffff);
-{ uint64_t x119 = (x114 + x82);
-{ uint64_t x120 = (x119 >> 0x1a);
-{ uint32_t x121 = ((uint32_t)x119 & 0x3ffffff);
-{ uint64_t x122 = (x117 + x90);
-{ uint64_t x123 = (x122 >> 0x1a);
-{ uint32_t x124 = ((uint32_t)x122 & 0x3ffffff);
-{ uint64_t x125 = (x120 + x81);
-{ uint64_t x126 = (x125 >> 0x1a);
-{ uint32_t x127 = ((uint32_t)x125 & 0x3ffffff);
-{ uint64_t x128 = (x123 + x89);
-{ uint64_t x129 = (x128 >> 0x1a);
-{ uint32_t x130 = ((uint32_t)x128 & 0x3ffffff);
-{ uint64_t x131 = (x126 + x80);
-{ uint64_t x132 = (x131 >> 0x1a);
-{ uint32_t x133 = ((uint32_t)x131 & 0x3ffffff);
-{ uint64_t x134 = (x129 + x88);
-{ uint64_t x135 = (x134 >> 0x1a);
-{ uint32_t x136 = ((uint32_t)x134 & 0x3ffffff);
-{ uint64_t x137 = (x132 + x79);
-{ uint64_t x138 = (x137 >> 0x1a);
-{ uint32_t x139 = ((uint32_t)x137 & 0x3ffffff);
-{ uint64_t x140 = (x135 + x87);
-{ uint64_t x141 = (x140 >> 0x1a);
-{ uint32_t x142 = ((uint32_t)x140 & 0x3ffffff);
-{ uint64_t x143 = (x138 + x100);
-{ uint32_t x144 = (uint32_t) (x143 >> 0x1a);
-{ uint32_t x145 = ((uint32_t)x143 & 0x3ffffff);
-{ uint64_t x146 = (x141 + x95);
-{ uint32_t x147 = (uint32_t) (x146 >> 0x1a);
-{ uint32_t x148 = ((uint32_t)x146 & 0x3ffffff);
-{ uint64_t x149 = (((uint64_t)0x4000000 * x144) + x145);
-{ uint32_t x150 = (uint32_t) (x149 >> 0x1a);
-{ uint32_t x151 = ((uint32_t)x149 & 0x3ffffff);
-{ uint32_t x152 = ((x147 + x103) + x150);
-{ uint32_t x153 = (x152 >> 0x1a);
-{ uint32_t x154 = (x152 & 0x3ffffff);
-{ uint32_t x155 = (x106 + x150);
-{ uint32_t x156 = (x155 >> 0x1a);
-{ uint32_t x157 = (x155 & 0x3ffffff);
-out[0] = x151;
-out[1] = x139;
-out[2] = x133;
-out[3] = x127;
-out[4] = x121;
-out[5] = x115;
-out[6] = x153 + x109;
-out[7] = x154;
-out[8] = x148;
-out[9] = x142;
-out[10] = x136;
-out[11] = x130;
-out[12] = x124;
-out[13] = x118;
-out[14] = x156 + x112;
-out[15] = x157;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[16];
+static void femul(uint32_t out[16], const uint32_t in1[16], const uint32_t in2[16]) {
+ { const uint32_t x32 = in1[15];
+ { const uint32_t x33 = in1[14];
+ { const uint32_t x31 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x62 = in2[15];
+ { const uint32_t x63 = in2[14];
+ { const uint32_t x61 = in2[13];
+ { const uint32_t x59 = in2[12];
+ { const uint32_t x57 = in2[11];
+ { const uint32_t x55 = in2[10];
+ { const uint32_t x53 = in2[9];
+ { const uint32_t x51 = in2[8];
+ { const uint32_t x49 = in2[7];
+ { const uint32_t x47 = in2[6];
+ { const uint32_t x45 = in2[5];
+ { const uint32_t x43 = in2[4];
+ { const uint32_t x41 = in2[3];
+ { const uint32_t x39 = in2[2];
+ { const uint32_t x37 = in2[1];
+ { const uint32_t x35 = in2[0];
+ { uint64_t x64 = (((uint64_t)(x19 + x32) * (x49 + x62)) - ((uint64_t)x19 * x49));
+ { uint64_t x65 = ((((uint64_t)(x17 + x33) * (x49 + x62)) + ((uint64_t)(x19 + x32) * (x47 + x63))) - (((uint64_t)x17 * x49) + ((uint64_t)x19 * x47)));
+ { uint64_t x66 = ((((uint64_t)(x15 + x31) * (x49 + x62)) + (((uint64_t)(x17 + x33) * (x47 + x63)) + ((uint64_t)(x19 + x32) * (x45 + x61)))) - (((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + ((uint64_t)x19 * x45))));
+ { uint64_t x67 = ((((uint64_t)(x13 + x29) * (x49 + x62)) + (((uint64_t)(x15 + x31) * (x47 + x63)) + (((uint64_t)(x17 + x33) * (x45 + x61)) + ((uint64_t)(x19 + x32) * (x43 + x59))))) - (((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + ((uint64_t)x19 * x43)))));
+ { uint64_t x68 = ((((uint64_t)(x11 + x27) * (x49 + x62)) + (((uint64_t)(x13 + x29) * (x47 + x63)) + (((uint64_t)(x15 + x31) * (x45 + x61)) + (((uint64_t)(x17 + x33) * (x43 + x59)) + ((uint64_t)(x19 + x32) * (x41 + x57)))))) - (((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((uint64_t)x19 * x41))))));
+ { uint64_t x69 = ((((uint64_t)(x9 + x25) * (x49 + x62)) + (((uint64_t)(x11 + x27) * (x47 + x63)) + (((uint64_t)(x13 + x29) * (x45 + x61)) + (((uint64_t)(x15 + x31) * (x43 + x59)) + (((uint64_t)(x17 + x33) * (x41 + x57)) + ((uint64_t)(x19 + x32) * (x39 + x55))))))) - (((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + ((uint64_t)x19 * x39)))))));
+ { uint64_t x70 = ((((uint64_t)(x7 + x23) * (x49 + x62)) + (((uint64_t)(x9 + x25) * (x47 + x63)) + (((uint64_t)(x11 + x27) * (x45 + x61)) + (((uint64_t)(x13 + x29) * (x43 + x59)) + (((uint64_t)(x15 + x31) * (x41 + x57)) + (((uint64_t)(x17 + x33) * (x39 + x55)) + ((uint64_t)(x19 + x32) * (x37 + x53)))))))) - (((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + ((uint64_t)x19 * x37))))))));
+ { uint64_t x71 = ((((uint64_t)(x5 + x21) * (x49 + x62)) + (((uint64_t)(x7 + x23) * (x47 + x63)) + (((uint64_t)(x9 + x25) * (x45 + x61)) + (((uint64_t)(x11 + x27) * (x43 + x59)) + (((uint64_t)(x13 + x29) * (x41 + x57)) + (((uint64_t)(x15 + x31) * (x39 + x55)) + (((uint64_t)(x17 + x33) * (x37 + x53)) + ((uint64_t)(x19 + x32) * (x35 + x51))))))))) - (((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((uint64_t)x19 * x35)))))))));
+ { uint64_t x72 = ((((uint64_t)(x5 + x21) * (x47 + x63)) + (((uint64_t)(x7 + x23) * (x45 + x61)) + (((uint64_t)(x9 + x25) * (x43 + x59)) + (((uint64_t)(x11 + x27) * (x41 + x57)) + (((uint64_t)(x13 + x29) * (x39 + x55)) + (((uint64_t)(x15 + x31) * (x37 + x53)) + ((uint64_t)(x17 + x33) * (x35 + x51)))))))) - (((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((uint64_t)x17 * x35))))))));
+ { uint64_t x73 = ((((uint64_t)(x5 + x21) * (x45 + x61)) + (((uint64_t)(x7 + x23) * (x43 + x59)) + (((uint64_t)(x9 + x25) * (x41 + x57)) + (((uint64_t)(x11 + x27) * (x39 + x55)) + (((uint64_t)(x13 + x29) * (x37 + x53)) + ((uint64_t)(x15 + x31) * (x35 + x51))))))) - (((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + ((uint64_t)x15 * x35)))))));
+ { uint64_t x74 = ((((uint64_t)(x5 + x21) * (x43 + x59)) + (((uint64_t)(x7 + x23) * (x41 + x57)) + (((uint64_t)(x9 + x25) * (x39 + x55)) + (((uint64_t)(x11 + x27) * (x37 + x53)) + ((uint64_t)(x13 + x29) * (x35 + x51)))))) - (((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + ((uint64_t)x13 * x35))))));
+ { uint64_t x75 = ((((uint64_t)(x5 + x21) * (x41 + x57)) + (((uint64_t)(x7 + x23) * (x39 + x55)) + (((uint64_t)(x9 + x25) * (x37 + x53)) + ((uint64_t)(x11 + x27) * (x35 + x51))))) - (((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + ((uint64_t)x11 * x35)))));
+ { uint64_t x76 = ((((uint64_t)(x5 + x21) * (x39 + x55)) + (((uint64_t)(x7 + x23) * (x37 + x53)) + ((uint64_t)(x9 + x25) * (x35 + x51)))) - (((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + ((uint64_t)x9 * x35))));
+ { uint64_t x77 = ((((uint64_t)(x5 + x21) * (x37 + x53)) + ((uint64_t)(x7 + x23) * (x35 + x51))) - (((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)));
+ { uint64_t x78 = (((uint64_t)(x5 + x21) * (x35 + x51)) - ((uint64_t)x5 * x35));
+ { uint64_t x79 = (((((uint64_t)x19 * x49) + ((uint64_t)x32 * x62)) + x72) + x64);
+ { uint64_t x80 = ((((((uint64_t)x17 * x49) + ((uint64_t)x19 * x47)) + (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))) + x73) + x65);
+ { uint64_t x81 = ((((((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + ((uint64_t)x19 * x45))) + (((uint64_t)x31 * x62) + (((uint64_t)x33 * x63) + ((uint64_t)x32 * x61)))) + x74) + x66);
+ { uint64_t x82 = ((((((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + ((uint64_t)x19 * x43)))) + (((uint64_t)x29 * x62) + (((uint64_t)x31 * x63) + (((uint64_t)x33 * x61) + ((uint64_t)x32 * x59))))) + x75) + x67);
+ { uint64_t x83 = ((((((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((uint64_t)x19 * x41))))) + (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + (((uint64_t)x31 * x61) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))) + x76) + x68);
+ { uint64_t x84 = ((((((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + ((uint64_t)x19 * x39)))))) + (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))) + x77) + x69);
+ { uint64_t x85 = ((((((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + ((uint64_t)x19 * x37))))))) + (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))) + x78) + x70);
+ { uint64_t x86 = ((((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((uint64_t)x19 * x35)))))))) + (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51)))))))));
+ { uint64_t x87 = (((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((uint64_t)x17 * x35))))))) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + ((uint64_t)x33 * x51)))))))) + x64);
+ { uint64_t x88 = (((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + ((uint64_t)x15 * x35)))))) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + ((uint64_t)x31 * x51))))))) + x65);
+ { uint64_t x89 = (((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + ((uint64_t)x13 * x35))))) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + ((uint64_t)x29 * x51)))))) + x66);
+ { uint64_t x90 = (((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + ((uint64_t)x11 * x35)))) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + ((uint64_t)x27 * x51))))) + x67);
+ { uint64_t x91 = (((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + ((uint64_t)x9 * x35))) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + ((uint64_t)x25 * x51)))) + x68);
+ { uint64_t x92 = (((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) + (((uint64_t)x21 * x53) + ((uint64_t)x23 * x51))) + x69);
+ { uint64_t x93 = ((((uint64_t)x5 * x35) + ((uint64_t)x21 * x51)) + x70);
+ { uint64_t x94 = (x86 >> 0x1a);
+ { uint32_t x95 = ((uint32_t)x86 & 0x3ffffff);
+ { uint64_t x96 = (x71 >> 0x1a);
+ { uint32_t x97 = ((uint32_t)x71 & 0x3ffffff);
+ { uint64_t x98 = ((0x4000000 * x96) + x97);
+ { uint64_t x99 = (x98 >> 0x1a);
+ { uint32_t x100 = ((uint32_t)x98 & 0x3ffffff);
+ { uint64_t x101 = ((x94 + x85) + x99);
+ { uint64_t x102 = (x101 >> 0x1a);
+ { uint32_t x103 = ((uint32_t)x101 & 0x3ffffff);
+ { uint64_t x104 = (x93 + x99);
+ { uint64_t x105 = (x104 >> 0x1a);
+ { uint32_t x106 = ((uint32_t)x104 & 0x3ffffff);
+ { uint64_t x107 = (x102 + x84);
+ { uint64_t x108 = (x107 >> 0x1a);
+ { uint32_t x109 = ((uint32_t)x107 & 0x3ffffff);
+ { uint64_t x110 = (x105 + x92);
+ { uint64_t x111 = (x110 >> 0x1a);
+ { uint32_t x112 = ((uint32_t)x110 & 0x3ffffff);
+ { uint64_t x113 = (x108 + x83);
+ { uint64_t x114 = (x113 >> 0x1a);
+ { uint32_t x115 = ((uint32_t)x113 & 0x3ffffff);
+ { uint64_t x116 = (x111 + x91);
+ { uint64_t x117 = (x116 >> 0x1a);
+ { uint32_t x118 = ((uint32_t)x116 & 0x3ffffff);
+ { uint64_t x119 = (x114 + x82);
+ { uint64_t x120 = (x119 >> 0x1a);
+ { uint32_t x121 = ((uint32_t)x119 & 0x3ffffff);
+ { uint64_t x122 = (x117 + x90);
+ { uint64_t x123 = (x122 >> 0x1a);
+ { uint32_t x124 = ((uint32_t)x122 & 0x3ffffff);
+ { uint64_t x125 = (x120 + x81);
+ { uint64_t x126 = (x125 >> 0x1a);
+ { uint32_t x127 = ((uint32_t)x125 & 0x3ffffff);
+ { uint64_t x128 = (x123 + x89);
+ { uint64_t x129 = (x128 >> 0x1a);
+ { uint32_t x130 = ((uint32_t)x128 & 0x3ffffff);
+ { uint64_t x131 = (x126 + x80);
+ { uint64_t x132 = (x131 >> 0x1a);
+ { uint32_t x133 = ((uint32_t)x131 & 0x3ffffff);
+ { uint64_t x134 = (x129 + x88);
+ { uint64_t x135 = (x134 >> 0x1a);
+ { uint32_t x136 = ((uint32_t)x134 & 0x3ffffff);
+ { uint64_t x137 = (x132 + x79);
+ { uint64_t x138 = (x137 >> 0x1a);
+ { uint32_t x139 = ((uint32_t)x137 & 0x3ffffff);
+ { uint64_t x140 = (x135 + x87);
+ { uint64_t x141 = (x140 >> 0x1a);
+ { uint32_t x142 = ((uint32_t)x140 & 0x3ffffff);
+ { uint64_t x143 = (x138 + x100);
+ { uint32_t x144 = (uint32_t) (x143 >> 0x1a);
+ { uint32_t x145 = ((uint32_t)x143 & 0x3ffffff);
+ { uint64_t x146 = (x141 + x95);
+ { uint32_t x147 = (uint32_t) (x146 >> 0x1a);
+ { uint32_t x148 = ((uint32_t)x146 & 0x3ffffff);
+ { uint64_t x149 = (((uint64_t)0x4000000 * x144) + x145);
+ { uint32_t x150 = (uint32_t) (x149 >> 0x1a);
+ { uint32_t x151 = ((uint32_t)x149 & 0x3ffffff);
+ { uint32_t x152 = ((x147 + x103) + x150);
+ { uint32_t x153 = (x152 >> 0x1a);
+ { uint32_t x154 = (x152 & 0x3ffffff);
+ { uint32_t x155 = (x106 + x150);
+ { uint32_t x156 = (x155 >> 0x1a);
+ { uint32_t x157 = (x155 & 0x3ffffff);
+ out[0] = x157;
+ out[1] = (x156 + x112);
+ out[2] = x118;
+ out[3] = x124;
+ out[4] = x130;
+ out[5] = x136;
+ out[6] = x142;
+ out[7] = x148;
+ out[8] = x154;
+ out[9] = (x153 + x109);
+ out[10] = x115;
+ out[11] = x121;
+ out[12] = x127;
+ out[13] = x133;
+ out[14] = x139;
+ out[15] = x151;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e416m2e208m1/fesquare.c b/src/Specific/solinas32_2e416m2e208m1/fesquare.c
index 0b436503e..6499f63f0 100644
--- a/src/Specific/solinas32_2e416m2e208m1/fesquare.c
+++ b/src/Specific/solinas32_2e416m2e208m1/fesquare.c
@@ -1,131 +1,129 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x31 = (((uint64_t)(x16 + x29) * (x16 + x29)) - ((uint64_t)x16 * x16));
-{ uint64_t x32 = ((((uint64_t)(x14 + x30) * (x16 + x29)) + ((uint64_t)(x16 + x29) * (x14 + x30))) - (((uint64_t)x14 * x16) + ((uint64_t)x16 * x14)));
-{ uint64_t x33 = ((((uint64_t)(x12 + x28) * (x16 + x29)) + (((uint64_t)(x14 + x30) * (x14 + x30)) + ((uint64_t)(x16 + x29) * (x12 + x28)))) - (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + ((uint64_t)x16 * x12))));
-{ uint64_t x34 = ((((uint64_t)(x10 + x26) * (x16 + x29)) + (((uint64_t)(x12 + x28) * (x14 + x30)) + (((uint64_t)(x14 + x30) * (x12 + x28)) + ((uint64_t)(x16 + x29) * (x10 + x26))))) - (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x16 * x10)))));
-{ uint64_t x35 = ((((uint64_t)(x8 + x24) * (x16 + x29)) + (((uint64_t)(x10 + x26) * (x14 + x30)) + (((uint64_t)(x12 + x28) * (x12 + x28)) + (((uint64_t)(x14 + x30) * (x10 + x26)) + ((uint64_t)(x16 + x29) * (x8 + x24)))))) - (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x16 * x8))))));
-{ uint64_t x36 = ((((uint64_t)(x6 + x22) * (x16 + x29)) + (((uint64_t)(x8 + x24) * (x14 + x30)) + (((uint64_t)(x10 + x26) * (x12 + x28)) + (((uint64_t)(x12 + x28) * (x10 + x26)) + (((uint64_t)(x14 + x30) * (x8 + x24)) + ((uint64_t)(x16 + x29) * (x6 + x22))))))) - (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x16 * x6)))))));
-{ uint64_t x37 = ((((uint64_t)(x4 + x20) * (x16 + x29)) + (((uint64_t)(x6 + x22) * (x14 + x30)) + (((uint64_t)(x8 + x24) * (x12 + x28)) + (((uint64_t)(x10 + x26) * (x10 + x26)) + (((uint64_t)(x12 + x28) * (x8 + x24)) + (((uint64_t)(x14 + x30) * (x6 + x22)) + ((uint64_t)(x16 + x29) * (x4 + x20)))))))) - (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((uint64_t)x16 * x4))))))));
-{ uint64_t x38 = ((((uint64_t)(x2 + x18) * (x16 + x29)) + (((uint64_t)(x4 + x20) * (x14 + x30)) + (((uint64_t)(x6 + x22) * (x12 + x28)) + (((uint64_t)(x8 + x24) * (x10 + x26)) + (((uint64_t)(x10 + x26) * (x8 + x24)) + (((uint64_t)(x12 + x28) * (x6 + x22)) + (((uint64_t)(x14 + x30) * (x4 + x20)) + ((uint64_t)(x16 + x29) * (x2 + x18))))))))) - (((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))));
-{ uint64_t x39 = ((((uint64_t)(x2 + x18) * (x14 + x30)) + (((uint64_t)(x4 + x20) * (x12 + x28)) + (((uint64_t)(x6 + x22) * (x10 + x26)) + (((uint64_t)(x8 + x24) * (x8 + x24)) + (((uint64_t)(x10 + x26) * (x6 + x22)) + (((uint64_t)(x12 + x28) * (x4 + x20)) + ((uint64_t)(x14 + x30) * (x2 + x18)))))))) - (((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))));
-{ uint64_t x40 = ((((uint64_t)(x2 + x18) * (x12 + x28)) + (((uint64_t)(x4 + x20) * (x10 + x26)) + (((uint64_t)(x6 + x22) * (x8 + x24)) + (((uint64_t)(x8 + x24) * (x6 + x22)) + (((uint64_t)(x10 + x26) * (x4 + x20)) + ((uint64_t)(x12 + x28) * (x2 + x18))))))) - (((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))));
-{ uint64_t x41 = ((((uint64_t)(x2 + x18) * (x10 + x26)) + (((uint64_t)(x4 + x20) * (x8 + x24)) + (((uint64_t)(x6 + x22) * (x6 + x22)) + (((uint64_t)(x8 + x24) * (x4 + x20)) + ((uint64_t)(x10 + x26) * (x2 + x18)))))) - (((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))));
-{ uint64_t x42 = ((((uint64_t)(x2 + x18) * (x8 + x24)) + (((uint64_t)(x4 + x20) * (x6 + x22)) + (((uint64_t)(x6 + x22) * (x4 + x20)) + ((uint64_t)(x8 + x24) * (x2 + x18))))) - (((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))));
-{ uint64_t x43 = ((((uint64_t)(x2 + x18) * (x6 + x22)) + (((uint64_t)(x4 + x20) * (x4 + x20)) + ((uint64_t)(x6 + x22) * (x2 + x18)))) - (((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))));
-{ uint64_t x44 = ((((uint64_t)(x2 + x18) * (x4 + x20)) + ((uint64_t)(x4 + x20) * (x2 + x18))) - (((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)));
-{ uint64_t x45 = (((uint64_t)(x2 + x18) * (x2 + x18)) - ((uint64_t)x2 * x2));
-{ uint64_t x46 = (((((uint64_t)x16 * x16) + ((uint64_t)x29 * x29)) + x39) + x31);
-{ uint64_t x47 = ((((((uint64_t)x14 * x16) + ((uint64_t)x16 * x14)) + (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))) + x40) + x32);
-{ uint64_t x48 = ((((((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + ((uint64_t)x16 * x12))) + (((uint64_t)x28 * x29) + (((uint64_t)x30 * x30) + ((uint64_t)x29 * x28)))) + x41) + x33);
-{ uint64_t x49 = ((((((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x16 * x10)))) + (((uint64_t)x26 * x29) + (((uint64_t)x28 * x30) + (((uint64_t)x30 * x28) + ((uint64_t)x29 * x26))))) + x42) + x34);
-{ uint64_t x50 = ((((((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x16 * x8))))) + (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + (((uint64_t)x28 * x28) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))) + x43) + x35);
-{ uint64_t x51 = ((((((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x16 * x6)))))) + (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))) + x44) + x36);
-{ uint64_t x52 = ((((((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((uint64_t)x16 * x4))))))) + (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))) + x45) + x37);
-{ uint64_t x53 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18)))))))));
-{ uint64_t x54 = (((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + ((uint64_t)x30 * x18)))))))) + x31);
-{ uint64_t x55 = (((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + ((uint64_t)x28 * x18))))))) + x32);
-{ uint64_t x56 = (((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + ((uint64_t)x26 * x18)))))) + x33);
-{ uint64_t x57 = (((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x24 * x18))))) + x34);
-{ uint64_t x58 = (((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + ((uint64_t)x22 * x18)))) + x35);
-{ uint64_t x59 = (((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (((uint64_t)x18 * x20) + ((uint64_t)x20 * x18))) + x36);
-{ uint64_t x60 = ((((uint64_t)x2 * x2) + ((uint64_t)x18 * x18)) + x37);
-{ uint64_t x61 = (x53 >> 0x1a);
-{ uint32_t x62 = ((uint32_t)x53 & 0x3ffffff);
-{ uint64_t x63 = (x38 >> 0x1a);
-{ uint32_t x64 = ((uint32_t)x38 & 0x3ffffff);
-{ uint64_t x65 = ((0x4000000 * x63) + x64);
-{ uint64_t x66 = (x65 >> 0x1a);
-{ uint32_t x67 = ((uint32_t)x65 & 0x3ffffff);
-{ uint64_t x68 = ((x61 + x52) + x66);
-{ uint64_t x69 = (x68 >> 0x1a);
-{ uint32_t x70 = ((uint32_t)x68 & 0x3ffffff);
-{ uint64_t x71 = (x60 + x66);
-{ uint64_t x72 = (x71 >> 0x1a);
-{ uint32_t x73 = ((uint32_t)x71 & 0x3ffffff);
-{ uint64_t x74 = (x69 + x51);
-{ uint64_t x75 = (x74 >> 0x1a);
-{ uint32_t x76 = ((uint32_t)x74 & 0x3ffffff);
-{ uint64_t x77 = (x72 + x59);
-{ uint64_t x78 = (x77 >> 0x1a);
-{ uint32_t x79 = ((uint32_t)x77 & 0x3ffffff);
-{ uint64_t x80 = (x75 + x50);
-{ uint64_t x81 = (x80 >> 0x1a);
-{ uint32_t x82 = ((uint32_t)x80 & 0x3ffffff);
-{ uint64_t x83 = (x78 + x58);
-{ uint64_t x84 = (x83 >> 0x1a);
-{ uint32_t x85 = ((uint32_t)x83 & 0x3ffffff);
-{ uint64_t x86 = (x81 + x49);
-{ uint64_t x87 = (x86 >> 0x1a);
-{ uint32_t x88 = ((uint32_t)x86 & 0x3ffffff);
-{ uint64_t x89 = (x84 + x57);
-{ uint64_t x90 = (x89 >> 0x1a);
-{ uint32_t x91 = ((uint32_t)x89 & 0x3ffffff);
-{ uint64_t x92 = (x87 + x48);
-{ uint64_t x93 = (x92 >> 0x1a);
-{ uint32_t x94 = ((uint32_t)x92 & 0x3ffffff);
-{ uint64_t x95 = (x90 + x56);
-{ uint64_t x96 = (x95 >> 0x1a);
-{ uint32_t x97 = ((uint32_t)x95 & 0x3ffffff);
-{ uint64_t x98 = (x93 + x47);
-{ uint64_t x99 = (x98 >> 0x1a);
-{ uint32_t x100 = ((uint32_t)x98 & 0x3ffffff);
-{ uint64_t x101 = (x96 + x55);
-{ uint64_t x102 = (x101 >> 0x1a);
-{ uint32_t x103 = ((uint32_t)x101 & 0x3ffffff);
-{ uint64_t x104 = (x99 + x46);
-{ uint64_t x105 = (x104 >> 0x1a);
-{ uint32_t x106 = ((uint32_t)x104 & 0x3ffffff);
-{ uint64_t x107 = (x102 + x54);
-{ uint64_t x108 = (x107 >> 0x1a);
-{ uint32_t x109 = ((uint32_t)x107 & 0x3ffffff);
-{ uint64_t x110 = (x105 + x67);
-{ uint32_t x111 = (uint32_t) (x110 >> 0x1a);
-{ uint32_t x112 = ((uint32_t)x110 & 0x3ffffff);
-{ uint64_t x113 = (x108 + x62);
-{ uint32_t x114 = (uint32_t) (x113 >> 0x1a);
-{ uint32_t x115 = ((uint32_t)x113 & 0x3ffffff);
-{ uint64_t x116 = (((uint64_t)0x4000000 * x111) + x112);
-{ uint32_t x117 = (uint32_t) (x116 >> 0x1a);
-{ uint32_t x118 = ((uint32_t)x116 & 0x3ffffff);
-{ uint32_t x119 = ((x114 + x70) + x117);
-{ uint32_t x120 = (x119 >> 0x1a);
-{ uint32_t x121 = (x119 & 0x3ffffff);
-{ uint32_t x122 = (x73 + x117);
-{ uint32_t x123 = (x122 >> 0x1a);
-{ uint32_t x124 = (x122 & 0x3ffffff);
-out[0] = x118;
-out[1] = x106;
-out[2] = x100;
-out[3] = x94;
-out[4] = x88;
-out[5] = x82;
-out[6] = x120 + x76;
-out[7] = x121;
-out[8] = x115;
-out[9] = x109;
-out[10] = x103;
-out[11] = x97;
-out[12] = x91;
-out[13] = x85;
-out[14] = x123 + x79;
-out[15] = x124;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[16];
+static void fesquare(uint32_t out[16], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x31 = (((uint64_t)(x16 + x29) * (x16 + x29)) - ((uint64_t)x16 * x16));
+ { uint64_t x32 = ((((uint64_t)(x14 + x30) * (x16 + x29)) + ((uint64_t)(x16 + x29) * (x14 + x30))) - (((uint64_t)x14 * x16) + ((uint64_t)x16 * x14)));
+ { uint64_t x33 = ((((uint64_t)(x12 + x28) * (x16 + x29)) + (((uint64_t)(x14 + x30) * (x14 + x30)) + ((uint64_t)(x16 + x29) * (x12 + x28)))) - (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + ((uint64_t)x16 * x12))));
+ { uint64_t x34 = ((((uint64_t)(x10 + x26) * (x16 + x29)) + (((uint64_t)(x12 + x28) * (x14 + x30)) + (((uint64_t)(x14 + x30) * (x12 + x28)) + ((uint64_t)(x16 + x29) * (x10 + x26))))) - (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x16 * x10)))));
+ { uint64_t x35 = ((((uint64_t)(x8 + x24) * (x16 + x29)) + (((uint64_t)(x10 + x26) * (x14 + x30)) + (((uint64_t)(x12 + x28) * (x12 + x28)) + (((uint64_t)(x14 + x30) * (x10 + x26)) + ((uint64_t)(x16 + x29) * (x8 + x24)))))) - (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x16 * x8))))));
+ { uint64_t x36 = ((((uint64_t)(x6 + x22) * (x16 + x29)) + (((uint64_t)(x8 + x24) * (x14 + x30)) + (((uint64_t)(x10 + x26) * (x12 + x28)) + (((uint64_t)(x12 + x28) * (x10 + x26)) + (((uint64_t)(x14 + x30) * (x8 + x24)) + ((uint64_t)(x16 + x29) * (x6 + x22))))))) - (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x16 * x6)))))));
+ { uint64_t x37 = ((((uint64_t)(x4 + x20) * (x16 + x29)) + (((uint64_t)(x6 + x22) * (x14 + x30)) + (((uint64_t)(x8 + x24) * (x12 + x28)) + (((uint64_t)(x10 + x26) * (x10 + x26)) + (((uint64_t)(x12 + x28) * (x8 + x24)) + (((uint64_t)(x14 + x30) * (x6 + x22)) + ((uint64_t)(x16 + x29) * (x4 + x20)))))))) - (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((uint64_t)x16 * x4))))))));
+ { uint64_t x38 = ((((uint64_t)(x2 + x18) * (x16 + x29)) + (((uint64_t)(x4 + x20) * (x14 + x30)) + (((uint64_t)(x6 + x22) * (x12 + x28)) + (((uint64_t)(x8 + x24) * (x10 + x26)) + (((uint64_t)(x10 + x26) * (x8 + x24)) + (((uint64_t)(x12 + x28) * (x6 + x22)) + (((uint64_t)(x14 + x30) * (x4 + x20)) + ((uint64_t)(x16 + x29) * (x2 + x18))))))))) - (((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))));
+ { uint64_t x39 = ((((uint64_t)(x2 + x18) * (x14 + x30)) + (((uint64_t)(x4 + x20) * (x12 + x28)) + (((uint64_t)(x6 + x22) * (x10 + x26)) + (((uint64_t)(x8 + x24) * (x8 + x24)) + (((uint64_t)(x10 + x26) * (x6 + x22)) + (((uint64_t)(x12 + x28) * (x4 + x20)) + ((uint64_t)(x14 + x30) * (x2 + x18)))))))) - (((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))));
+ { uint64_t x40 = ((((uint64_t)(x2 + x18) * (x12 + x28)) + (((uint64_t)(x4 + x20) * (x10 + x26)) + (((uint64_t)(x6 + x22) * (x8 + x24)) + (((uint64_t)(x8 + x24) * (x6 + x22)) + (((uint64_t)(x10 + x26) * (x4 + x20)) + ((uint64_t)(x12 + x28) * (x2 + x18))))))) - (((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))));
+ { uint64_t x41 = ((((uint64_t)(x2 + x18) * (x10 + x26)) + (((uint64_t)(x4 + x20) * (x8 + x24)) + (((uint64_t)(x6 + x22) * (x6 + x22)) + (((uint64_t)(x8 + x24) * (x4 + x20)) + ((uint64_t)(x10 + x26) * (x2 + x18)))))) - (((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))));
+ { uint64_t x42 = ((((uint64_t)(x2 + x18) * (x8 + x24)) + (((uint64_t)(x4 + x20) * (x6 + x22)) + (((uint64_t)(x6 + x22) * (x4 + x20)) + ((uint64_t)(x8 + x24) * (x2 + x18))))) - (((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))));
+ { uint64_t x43 = ((((uint64_t)(x2 + x18) * (x6 + x22)) + (((uint64_t)(x4 + x20) * (x4 + x20)) + ((uint64_t)(x6 + x22) * (x2 + x18)))) - (((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))));
+ { uint64_t x44 = ((((uint64_t)(x2 + x18) * (x4 + x20)) + ((uint64_t)(x4 + x20) * (x2 + x18))) - (((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)));
+ { uint64_t x45 = (((uint64_t)(x2 + x18) * (x2 + x18)) - ((uint64_t)x2 * x2));
+ { uint64_t x46 = (((((uint64_t)x16 * x16) + ((uint64_t)x29 * x29)) + x39) + x31);
+ { uint64_t x47 = ((((((uint64_t)x14 * x16) + ((uint64_t)x16 * x14)) + (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))) + x40) + x32);
+ { uint64_t x48 = ((((((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + ((uint64_t)x16 * x12))) + (((uint64_t)x28 * x29) + (((uint64_t)x30 * x30) + ((uint64_t)x29 * x28)))) + x41) + x33);
+ { uint64_t x49 = ((((((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x16 * x10)))) + (((uint64_t)x26 * x29) + (((uint64_t)x28 * x30) + (((uint64_t)x30 * x28) + ((uint64_t)x29 * x26))))) + x42) + x34);
+ { uint64_t x50 = ((((((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x16 * x8))))) + (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + (((uint64_t)x28 * x28) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))) + x43) + x35);
+ { uint64_t x51 = ((((((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x16 * x6)))))) + (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))) + x44) + x36);
+ { uint64_t x52 = ((((((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((uint64_t)x16 * x4))))))) + (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))) + x45) + x37);
+ { uint64_t x53 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18)))))))));
+ { uint64_t x54 = (((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + ((uint64_t)x30 * x18)))))))) + x31);
+ { uint64_t x55 = (((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + ((uint64_t)x28 * x18))))))) + x32);
+ { uint64_t x56 = (((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + ((uint64_t)x26 * x18)))))) + x33);
+ { uint64_t x57 = (((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x24 * x18))))) + x34);
+ { uint64_t x58 = (((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + ((uint64_t)x22 * x18)))) + x35);
+ { uint64_t x59 = (((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (((uint64_t)x18 * x20) + ((uint64_t)x20 * x18))) + x36);
+ { uint64_t x60 = ((((uint64_t)x2 * x2) + ((uint64_t)x18 * x18)) + x37);
+ { uint64_t x61 = (x53 >> 0x1a);
+ { uint32_t x62 = ((uint32_t)x53 & 0x3ffffff);
+ { uint64_t x63 = (x38 >> 0x1a);
+ { uint32_t x64 = ((uint32_t)x38 & 0x3ffffff);
+ { uint64_t x65 = ((0x4000000 * x63) + x64);
+ { uint64_t x66 = (x65 >> 0x1a);
+ { uint32_t x67 = ((uint32_t)x65 & 0x3ffffff);
+ { uint64_t x68 = ((x61 + x52) + x66);
+ { uint64_t x69 = (x68 >> 0x1a);
+ { uint32_t x70 = ((uint32_t)x68 & 0x3ffffff);
+ { uint64_t x71 = (x60 + x66);
+ { uint64_t x72 = (x71 >> 0x1a);
+ { uint32_t x73 = ((uint32_t)x71 & 0x3ffffff);
+ { uint64_t x74 = (x69 + x51);
+ { uint64_t x75 = (x74 >> 0x1a);
+ { uint32_t x76 = ((uint32_t)x74 & 0x3ffffff);
+ { uint64_t x77 = (x72 + x59);
+ { uint64_t x78 = (x77 >> 0x1a);
+ { uint32_t x79 = ((uint32_t)x77 & 0x3ffffff);
+ { uint64_t x80 = (x75 + x50);
+ { uint64_t x81 = (x80 >> 0x1a);
+ { uint32_t x82 = ((uint32_t)x80 & 0x3ffffff);
+ { uint64_t x83 = (x78 + x58);
+ { uint64_t x84 = (x83 >> 0x1a);
+ { uint32_t x85 = ((uint32_t)x83 & 0x3ffffff);
+ { uint64_t x86 = (x81 + x49);
+ { uint64_t x87 = (x86 >> 0x1a);
+ { uint32_t x88 = ((uint32_t)x86 & 0x3ffffff);
+ { uint64_t x89 = (x84 + x57);
+ { uint64_t x90 = (x89 >> 0x1a);
+ { uint32_t x91 = ((uint32_t)x89 & 0x3ffffff);
+ { uint64_t x92 = (x87 + x48);
+ { uint64_t x93 = (x92 >> 0x1a);
+ { uint32_t x94 = ((uint32_t)x92 & 0x3ffffff);
+ { uint64_t x95 = (x90 + x56);
+ { uint64_t x96 = (x95 >> 0x1a);
+ { uint32_t x97 = ((uint32_t)x95 & 0x3ffffff);
+ { uint64_t x98 = (x93 + x47);
+ { uint64_t x99 = (x98 >> 0x1a);
+ { uint32_t x100 = ((uint32_t)x98 & 0x3ffffff);
+ { uint64_t x101 = (x96 + x55);
+ { uint64_t x102 = (x101 >> 0x1a);
+ { uint32_t x103 = ((uint32_t)x101 & 0x3ffffff);
+ { uint64_t x104 = (x99 + x46);
+ { uint64_t x105 = (x104 >> 0x1a);
+ { uint32_t x106 = ((uint32_t)x104 & 0x3ffffff);
+ { uint64_t x107 = (x102 + x54);
+ { uint64_t x108 = (x107 >> 0x1a);
+ { uint32_t x109 = ((uint32_t)x107 & 0x3ffffff);
+ { uint64_t x110 = (x105 + x67);
+ { uint32_t x111 = (uint32_t) (x110 >> 0x1a);
+ { uint32_t x112 = ((uint32_t)x110 & 0x3ffffff);
+ { uint64_t x113 = (x108 + x62);
+ { uint32_t x114 = (uint32_t) (x113 >> 0x1a);
+ { uint32_t x115 = ((uint32_t)x113 & 0x3ffffff);
+ { uint64_t x116 = (((uint64_t)0x4000000 * x111) + x112);
+ { uint32_t x117 = (uint32_t) (x116 >> 0x1a);
+ { uint32_t x118 = ((uint32_t)x116 & 0x3ffffff);
+ { uint32_t x119 = ((x114 + x70) + x117);
+ { uint32_t x120 = (x119 >> 0x1a);
+ { uint32_t x121 = (x119 & 0x3ffffff);
+ { uint32_t x122 = (x73 + x117);
+ { uint32_t x123 = (x122 >> 0x1a);
+ { uint32_t x124 = (x122 & 0x3ffffff);
+ out[0] = x124;
+ out[1] = (x123 + x79);
+ out[2] = x85;
+ out[3] = x91;
+ out[4] = x97;
+ out[5] = x103;
+ out[6] = x109;
+ out[7] = x115;
+ out[8] = x121;
+ out[9] = (x120 + x76);
+ out[10] = x82;
+ out[11] = x88;
+ out[12] = x94;
+ out[13] = x100;
+ out[14] = x106;
+ out[15] = x118;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e416m2e208m1/freeze.c b/src/Specific/solinas32_2e416m2e208m1/freeze.c
index 5de4dcf81..686158a18 100644
--- a/src/Specific/solinas32_2e416m2e208m1/freeze.c
+++ b/src/Specific/solinas32_2e416m2e208m1/freeze.c
@@ -1,25 +1,84 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x32;
-out[1] = uint8_t x33 = Op Syntax.SubWithGetBorrow 26 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3ffffff;;
+static void freeze(uint32_t out[16], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x3ffffff);
+ { uint32_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x4, 0x3ffffff);
+ { uint32_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x36, Return x6, 0x3ffffff);
+ { uint32_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x39, Return x8, 0x3ffffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x10, 0x3ffffff);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x12, 0x3ffffff);
+ { uint32_t x50, uint8_t x51 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x48, Return x14, 0x3ffffff);
+ { uint32_t x53, uint8_t x54 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x51, Return x16, 0x3ffffff);
+ { uint32_t x56, uint8_t x57 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x54, Return x18, 0x3fffffe);
+ { uint32_t x59, uint8_t x60 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x20, 0x3ffffff);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x22, 0x3ffffff);
+ { uint32_t x65, uint8_t x66 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x24, 0x3ffffff);
+ { uint32_t x68, uint8_t x69 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x66, Return x26, 0x3ffffff);
+ { uint32_t x71, uint8_t x72 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x69, Return x28, 0x3ffffff);
+ { uint32_t x74, uint8_t x75 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x30, 0x3ffffff);
+ { uint32_t x77, uint8_t x78 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x29, 0x3ffffff);
+ { uint32_t x79 = (uint32_t)cmovznz(x78, 0x0, 0xffffffff);
+ { uint32_t x80 = (x79 & 0x3ffffff);
+ { uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x32, Return x80);
+ { uint32_t x84 = (x79 & 0x3ffffff);
+ { uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x35, Return x84);
+ { uint32_t x88 = (x79 & 0x3ffffff);
+ { uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x38, Return x88);
+ { uint32_t x92 = (x79 & 0x3ffffff);
+ { uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x41, Return x92);
+ { uint32_t x96 = (x79 & 0x3ffffff);
+ { uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x44, Return x96);
+ { uint32_t x100 = (x79 & 0x3ffffff);
+ { uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x47, Return x100);
+ { uint32_t x104 = (x79 & 0x3ffffff);
+ { uint32_t x106, uint8_t x107 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x50, Return x104);
+ { uint32_t x108 = (x79 & 0x3ffffff);
+ { uint32_t x110, uint8_t x111 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x107, Return x53, Return x108);
+ { uint32_t x112 = (x79 & 0x3fffffe);
+ { uint32_t x114, uint8_t x115 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x111, Return x56, Return x112);
+ { uint32_t x116 = (x79 & 0x3ffffff);
+ { uint32_t x118, uint8_t x119 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x115, Return x59, Return x116);
+ { uint32_t x120 = (x79 & 0x3ffffff);
+ { uint32_t x122, uint8_t x123 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x119, Return x62, Return x120);
+ { uint32_t x124 = (x79 & 0x3ffffff);
+ { uint32_t x126, uint8_t x127 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x123, Return x65, Return x124);
+ { uint32_t x128 = (x79 & 0x3ffffff);
+ { uint32_t x130, uint8_t x131 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x127, Return x68, Return x128);
+ { uint32_t x132 = (x79 & 0x3ffffff);
+ { uint32_t x134, uint8_t x135 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x131, Return x71, Return x132);
+ { uint32_t x136 = (x79 & 0x3ffffff);
+ { uint32_t x138, uint8_t x139 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x135, Return x74, Return x136);
+ { uint32_t x140 = (x79 & 0x3ffffff);
+ { uint32_t x142, uint8_t _ = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x139, Return x77, Return x140);
+ out[0] = x82;
+ out[1] = x86;
+ out[2] = x90;
+ out[3] = x94;
+ out[4] = x98;
+ out[5] = x102;
+ out[6] = x106;
+ out[7] = x110;
+ out[8] = x114;
+ out[9] = x118;
+ out[10] = x122;
+ out[11] = x126;
+ out[12] = x130;
+ out[13] = x134;
+ out[14] = x138;
+ out[15] = x142;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e448m2e224m1/femul.c b/src/Specific/solinas32_2e448m2e224m1/femul.c
index 3325c3e94..309e7b417 100644
--- a/src/Specific/solinas32_2e448m2e224m1/femul.c
+++ b/src/Specific/solinas32_2e448m2e224m1/femul.c
@@ -1,131 +1,145 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
-{ uint64_t x64 = (((uint64_t)(x19 + x32) * (x49 + x62)) - ((uint64_t)x19 * x49));
-{ uint64_t x65 = ((((uint64_t)(x17 + x33) * (x49 + x62)) + ((uint64_t)(x19 + x32) * (x47 + x63))) - (((uint64_t)x17 * x49) + ((uint64_t)x19 * x47)));
-{ uint64_t x66 = ((((uint64_t)(x15 + x31) * (x49 + x62)) + (((uint64_t)(x17 + x33) * (x47 + x63)) + ((uint64_t)(x19 + x32) * (x45 + x61)))) - (((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + ((uint64_t)x19 * x45))));
-{ uint64_t x67 = ((((uint64_t)(x13 + x29) * (x49 + x62)) + (((uint64_t)(x15 + x31) * (x47 + x63)) + (((uint64_t)(x17 + x33) * (x45 + x61)) + ((uint64_t)(x19 + x32) * (x43 + x59))))) - (((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + ((uint64_t)x19 * x43)))));
-{ uint64_t x68 = ((((uint64_t)(x11 + x27) * (x49 + x62)) + (((uint64_t)(x13 + x29) * (x47 + x63)) + (((uint64_t)(x15 + x31) * (x45 + x61)) + (((uint64_t)(x17 + x33) * (x43 + x59)) + ((uint64_t)(x19 + x32) * (x41 + x57)))))) - (((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((uint64_t)x19 * x41))))));
-{ uint64_t x69 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x9 + x25) * (x49 + x62)) +ℤ (((uint64_t)(x11 + x27) * (x47 + x63)) + (((uint64_t)(x13 + x29) * (x45 + x61)) + (((uint64_t)(x15 + x31) * (x43 + x59)) + (((uint64_t)(x17 + x33) * (x41 + x57)) + ((uint64_t)(x19 + x32) * (x39 + x55))))))) -ℤ (((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + ((uint64_t)x19 * x39))))))), (((((uint64_t)x9 * x62) + (((uint64_t)x11 * x63) + (((uint64_t)x13 * x61) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + ((uint64_t)x19 * x55)))))) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + (((uint64_t)x33 * x41) + ((uint64_t)x32 * x39))))))) + (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))));
-{ uint64_t x70 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x7 + x23) * (x49 + x62)) +ℤ (((uint64_t)(x9 + x25) * (x47 + x63)) +ℤ (((uint64_t)(x11 + x27) * (x45 + x61)) + (((uint64_t)(x13 + x29) * (x43 + x59)) + (((uint64_t)(x15 + x31) * (x41 + x57)) + (((uint64_t)(x17 + x33) * (x39 + x55)) + ((uint64_t)(x19 + x32) * (x37 + x53)))))))) -ℤ (((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + ((uint64_t)x19 * x37)))))))), (((((uint64_t)x7 * x62) + (((uint64_t)x9 * x63) + (((uint64_t)x11 * x61) + (((uint64_t)x13 * x59) + (((uint64_t)x15 * x57) + (((uint64_t)x17 * x55) + ((uint64_t)x19 * x53))))))) + (((uint64_t)x23 * x49) + (((uint64_t)x25 * x47) + (((uint64_t)x27 * x45) + (((uint64_t)x29 * x43) + (((uint64_t)x31 * x41) + (((uint64_t)x33 * x39) + ((uint64_t)x32 * x37)))))))) + (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))));
-{ ℤ x71 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)(x5 + x21) * (x49 + x62)) +ℤ (((uint64_t)(x7 + x23) * (x47 + x63)) +ℤ (((uint64_t)(x9 + x25) * (x45 + x61)) +ℤ (((uint64_t)(x11 + x27) * (x43 + x59)) + (((uint64_t)(x13 + x29) * (x41 + x57)) + (((uint64_t)(x15 + x31) * (x39 + x55)) + (((uint64_t)(x17 + x33) * (x37 + x53)) + ((uint64_t)(x19 + x32) * (x35 + x51))))))))) -ℤ (((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((uint64_t)x19 * x35))))))))), (((((uint64_t)x5 * x62) + (((uint64_t)x7 * x63) + (((uint64_t)x9 * x61) + (((uint64_t)x11 * x59) + (((uint64_t)x13 * x57) + (((uint64_t)x15 * x55) + (((uint64_t)x17 * x53) + ((uint64_t)x19 * x51)))))))) + (((uint64_t)x21 * x49) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + (((uint64_t)x27 * x43) + (((uint64_t)x29 * x41) + (((uint64_t)x31 * x39) + (((uint64_t)x33 * x37) + ((uint64_t)x32 * x35))))))))) +ℤ (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51))))))))));
-{ uint64_t x72 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x5 + x21) * (x47 + x63)) +ℤ (((uint64_t)(x7 + x23) * (x45 + x61)) +ℤ (((uint64_t)(x9 + x25) * (x43 + x59)) + (((uint64_t)(x11 + x27) * (x41 + x57)) + (((uint64_t)(x13 + x29) * (x39 + x55)) + (((uint64_t)(x15 + x31) * (x37 + x53)) + ((uint64_t)(x17 + x33) * (x35 + x51)))))))) -ℤ (((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((uint64_t)x17 * x35)))))))), (((((uint64_t)x5 * x63) + (((uint64_t)x7 * x61) + (((uint64_t)x9 * x59) + (((uint64_t)x11 * x57) + (((uint64_t)x13 * x55) + (((uint64_t)x15 * x53) + ((uint64_t)x17 * x51))))))) + (((uint64_t)x21 * x47) + (((uint64_t)x23 * x45) + (((uint64_t)x25 * x43) + (((uint64_t)x27 * x41) + (((uint64_t)x29 * x39) + (((uint64_t)x31 * x37) + ((uint64_t)x33 * x35)))))))) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + ((uint64_t)x33 * x51)))))))));
-{ uint64_t x73 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x5 + x21) * (x45 + x61)) +ℤ (((uint64_t)(x7 + x23) * (x43 + x59)) + (((uint64_t)(x9 + x25) * (x41 + x57)) + (((uint64_t)(x11 + x27) * (x39 + x55)) + (((uint64_t)(x13 + x29) * (x37 + x53)) + ((uint64_t)(x15 + x31) * (x35 + x51))))))) -ℤ (((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + ((uint64_t)x15 * x35))))))), (((((uint64_t)x5 * x61) + (((uint64_t)x7 * x59) + (((uint64_t)x9 * x57) + (((uint64_t)x11 * x55) + (((uint64_t)x13 * x53) + ((uint64_t)x15 * x51)))))) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + (((uint64_t)x27 * x39) + (((uint64_t)x29 * x37) + ((uint64_t)x31 * x35))))))) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + ((uint64_t)x31 * x51))))))));
-{ uint64_t x74 = ((((uint64_t)(x5 + x21) * (x43 + x59)) + (((uint64_t)(x7 + x23) * (x41 + x57)) + (((uint64_t)(x9 + x25) * (x39 + x55)) + (((uint64_t)(x11 + x27) * (x37 + x53)) + ((uint64_t)(x13 + x29) * (x35 + x51)))))) - (((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + ((uint64_t)x13 * x35))))));
-{ uint64_t x75 = ((((uint64_t)(x5 + x21) * (x41 + x57)) + (((uint64_t)(x7 + x23) * (x39 + x55)) + (((uint64_t)(x9 + x25) * (x37 + x53)) + ((uint64_t)(x11 + x27) * (x35 + x51))))) - (((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + ((uint64_t)x11 * x35)))));
-{ uint64_t x76 = ((((uint64_t)(x5 + x21) * (x39 + x55)) + (((uint64_t)(x7 + x23) * (x37 + x53)) + ((uint64_t)(x9 + x25) * (x35 + x51)))) - (((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + ((uint64_t)x9 * x35))));
-{ uint64_t x77 = ((((uint64_t)(x5 + x21) * (x37 + x53)) + ((uint64_t)(x7 + x23) * (x35 + x51))) - (((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)));
-{ uint64_t x78 = (((uint64_t)(x5 + x21) * (x35 + x51)) - ((uint64_t)x5 * x35));
-{ ℤ x79 = (((((uint64_t)x19 * x49) + ((uint64_t)x32 * x62)) +ℤ x72) +ℤ x64);
-{ ℤ x80 = ((((((uint64_t)x17 * x49) + ((uint64_t)x19 * x47)) + (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))) + x73) +ℤ x65);
-{ ℤ x81 = ((((((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + ((uint64_t)x19 * x45))) + (((uint64_t)x31 * x62) + (((uint64_t)x33 * x63) + ((uint64_t)x32 * x61)))) + x74) +ℤ x66);
-{ ℤ x82 = ((((((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + ((uint64_t)x19 * x43)))) + (((uint64_t)x29 * x62) + (((uint64_t)x31 * x63) + (((uint64_t)x33 * x61) + ((uint64_t)x32 * x59))))) + x75) +ℤ x67);
-{ ℤ x83 = ((((((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((uint64_t)x19 * x41))))) + (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + (((uint64_t)x31 * x61) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))) + x76) +ℤ x68);
-{ ℤ x84 = ((((((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + ((uint64_t)x19 * x39)))))) + (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))) + x77) +ℤ x69);
-{ ℤ x85 = ((((((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + ((uint64_t)x19 * x37))))))) + (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))) + x78) +ℤ x70);
-{ uint64_t x86 = ((((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((uint64_t)x19 * x35)))))))) + (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51)))))))));
-{ uint64_t x87 = (((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((uint64_t)x17 * x35))))))) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + ((uint64_t)x33 * x51)))))))) + x64);
-{ uint64_t x88 = (((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + ((uint64_t)x15 * x35)))))) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + ((uint64_t)x31 * x51))))))) + x65);
-{ uint64_t x89 = (((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + ((uint64_t)x13 * x35))))) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + ((uint64_t)x29 * x51)))))) + x66);
-{ uint64_t x90 = (((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + ((uint64_t)x11 * x35)))) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + ((uint64_t)x27 * x51))))) + x67);
-{ uint64_t x91 = (((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + ((uint64_t)x9 * x35))) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + ((uint64_t)x25 * x51)))) + x68);
-{ uint64_t x92 = (((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) + (((uint64_t)x21 * x53) + ((uint64_t)x23 * x51))) + x69);
-{ ℤ x93 = ((((uint64_t)x5 * x35) + ((uint64_t)x21 * x51)) +ℤ x70);
-{ uint64_t x94 = (x86 >> 0x1c);
-{ uint32_t x95 = ((uint32_t)x86 & 0xfffffff);
-{ uint64_t x96 = (x71 >> 0x1c);
-{ uint32_t x97 = (x71 & 0xfffffff);
-{ ℤ x98 = ((0x10000000 *ℤ x96) +ℤ x97);
-{ uint64_t x99 = (x98 >> 0x1c);
-{ uint32_t x100 = (x98 & 0xfffffff);
-{ ℤ x101 = ((x94 +ℤ x85) +ℤ x99);
-{ uint64_t x102 = (x101 >> 0x1c);
-{ uint32_t x103 = (x101 & 0xfffffff);
-{ ℤ x104 = (x93 +ℤ x99);
-{ uint64_t x105 = (x104 >> 0x1c);
-{ uint32_t x106 = (x104 & 0xfffffff);
-{ ℤ x107 = (x102 +ℤ x84);
-{ uint64_t x108 = (x107 >> 0x1c);
-{ uint32_t x109 = (x107 & 0xfffffff);
-{ uint64_t x110 = (x105 + x92);
-{ uint64_t x111 = (x110 >> 0x1c);
-{ uint32_t x112 = ((uint32_t)x110 & 0xfffffff);
-{ ℤ x113 = (x108 +ℤ x83);
-{ uint64_t x114 = (x113 >> 0x1c);
-{ uint32_t x115 = (x113 & 0xfffffff);
-{ uint64_t x116 = (x111 + x91);
-{ uint64_t x117 = (x116 >> 0x1c);
-{ uint32_t x118 = ((uint32_t)x116 & 0xfffffff);
-{ ℤ x119 = (x114 +ℤ x82);
-{ uint64_t x120 = (x119 >> 0x1c);
-{ uint32_t x121 = (x119 & 0xfffffff);
-{ uint64_t x122 = (x117 + x90);
-{ uint64_t x123 = (x122 >> 0x1c);
-{ uint32_t x124 = ((uint32_t)x122 & 0xfffffff);
-{ ℤ x125 = (x120 +ℤ x81);
-{ uint64_t x126 = (x125 >> 0x1c);
-{ uint32_t x127 = (x125 & 0xfffffff);
-{ uint64_t x128 = (x123 + x89);
-{ uint64_t x129 = (x128 >> 0x1c);
-{ uint32_t x130 = ((uint32_t)x128 & 0xfffffff);
-{ ℤ x131 = (x126 +ℤ x80);
-{ uint64_t x132 = (x131 >> 0x1c);
-{ uint32_t x133 = (x131 & 0xfffffff);
-{ uint64_t x134 = (x129 + x88);
-{ uint64_t x135 = (x134 >> 0x1c);
-{ uint32_t x136 = ((uint32_t)x134 & 0xfffffff);
-{ ℤ x137 = (x132 +ℤ x79);
-{ uint64_t x138 = (x137 >> 0x1c);
-{ uint32_t x139 = (x137 & 0xfffffff);
-{ uint64_t x140 = (x135 + x87);
-{ uint64_t x141 = (x140 >> 0x1c);
-{ uint32_t x142 = ((uint32_t)x140 & 0xfffffff);
-{ uint64_t x143 = (x138 + x100);
-{ uint32_t x144 = (uint32_t) (x143 >> 0x1c);
-{ uint32_t x145 = ((uint32_t)x143 & 0xfffffff);
-{ uint64_t x146 = (x141 + x95);
-{ uint32_t x147 = (uint32_t) (x146 >> 0x1c);
-{ uint32_t x148 = ((uint32_t)x146 & 0xfffffff);
-{ uint64_t x149 = (((uint64_t)0x10000000 * x144) + x145);
-{ uint32_t x150 = (uint32_t) (x149 >> 0x1c);
-{ uint32_t x151 = ((uint32_t)x149 & 0xfffffff);
-{ uint32_t x152 = ((x147 + x103) + x150);
-{ uint32_t x153 = (x152 >> 0x1c);
-{ uint32_t x154 = (x152 & 0xfffffff);
-{ uint32_t x155 = (x106 + x150);
-{ uint32_t x156 = (x155 >> 0x1c);
-{ uint32_t x157 = (x155 & 0xfffffff);
-out[0] = x151;
-out[1] = x139;
-out[2] = x133;
-out[3] = x127;
-out[4] = x121;
-out[5] = x115;
-out[6] = x153 + x109;
-out[7] = x154;
-out[8] = x148;
-out[9] = x142;
-out[10] = x136;
-out[11] = x130;
-out[12] = x124;
-out[13] = x118;
-out[14] = x156 + x112;
-out[15] = x157;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[16];
+static void femul(uint32_t out[16], const uint32_t in1[16], const uint32_t in2[16]) {
+ { const uint32_t x32 = in1[15];
+ { const uint32_t x33 = in1[14];
+ { const uint32_t x31 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x62 = in2[15];
+ { const uint32_t x63 = in2[14];
+ { const uint32_t x61 = in2[13];
+ { const uint32_t x59 = in2[12];
+ { const uint32_t x57 = in2[11];
+ { const uint32_t x55 = in2[10];
+ { const uint32_t x53 = in2[9];
+ { const uint32_t x51 = in2[8];
+ { const uint32_t x49 = in2[7];
+ { const uint32_t x47 = in2[6];
+ { const uint32_t x45 = in2[5];
+ { const uint32_t x43 = in2[4];
+ { const uint32_t x41 = in2[3];
+ { const uint32_t x39 = in2[2];
+ { const uint32_t x37 = in2[1];
+ { const uint32_t x35 = in2[0];
+ { uint64_t x64 = (((uint64_t)(x19 + x32) * (x49 + x62)) - ((uint64_t)x19 * x49));
+ { uint64_t x65 = ((((uint64_t)(x17 + x33) * (x49 + x62)) + ((uint64_t)(x19 + x32) * (x47 + x63))) - (((uint64_t)x17 * x49) + ((uint64_t)x19 * x47)));
+ { uint64_t x66 = ((((uint64_t)(x15 + x31) * (x49 + x62)) + (((uint64_t)(x17 + x33) * (x47 + x63)) + ((uint64_t)(x19 + x32) * (x45 + x61)))) - (((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + ((uint64_t)x19 * x45))));
+ { uint64_t x67 = ((((uint64_t)(x13 + x29) * (x49 + x62)) + (((uint64_t)(x15 + x31) * (x47 + x63)) + (((uint64_t)(x17 + x33) * (x45 + x61)) + ((uint64_t)(x19 + x32) * (x43 + x59))))) - (((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + ((uint64_t)x19 * x43)))));
+ { uint64_t x68 = ((((uint64_t)(x11 + x27) * (x49 + x62)) + (((uint64_t)(x13 + x29) * (x47 + x63)) + (((uint64_t)(x15 + x31) * (x45 + x61)) + (((uint64_t)(x17 + x33) * (x43 + x59)) + ((uint64_t)(x19 + x32) * (x41 + x57)))))) - (((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((uint64_t)x19 * x41))))));
+ { uint64_t x69 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x9 + x25) * (x49 + x62)) +ℤ (((uint64_t)(x11 + x27) * (x47 + x63)) + (((uint64_t)(x13 + x29) * (x45 + x61)) + (((uint64_t)(x15 + x31) * (x43 + x59)) + (((uint64_t)(x17 + x33) * (x41 + x57)) + ((uint64_t)(x19 + x32) * (x39 + x55))))))) -ℤ (((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + ((uint64_t)x19 * x39))))))), (((((uint64_t)x9 * x62) + (((uint64_t)x11 * x63) + (((uint64_t)x13 * x61) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + ((uint64_t)x19 * x55)))))) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + (((uint64_t)x33 * x41) + ((uint64_t)x32 * x39))))))) + (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))));
+ { uint64_t x70 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x7 + x23) * (x49 + x62)) +ℤ (((uint64_t)(x9 + x25) * (x47 + x63)) +ℤ (((uint64_t)(x11 + x27) * (x45 + x61)) + (((uint64_t)(x13 + x29) * (x43 + x59)) + (((uint64_t)(x15 + x31) * (x41 + x57)) + (((uint64_t)(x17 + x33) * (x39 + x55)) + ((uint64_t)(x19 + x32) * (x37 + x53)))))))) -ℤ (((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + ((uint64_t)x19 * x37)))))))), (((((uint64_t)x7 * x62) + (((uint64_t)x9 * x63) + (((uint64_t)x11 * x61) + (((uint64_t)x13 * x59) + (((uint64_t)x15 * x57) + (((uint64_t)x17 * x55) + ((uint64_t)x19 * x53))))))) + (((uint64_t)x23 * x49) + (((uint64_t)x25 * x47) + (((uint64_t)x27 * x45) + (((uint64_t)x29 * x43) + (((uint64_t)x31 * x41) + (((uint64_t)x33 * x39) + ((uint64_t)x32 * x37)))))))) + (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))));
+ { ℤ x71 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)(x5 + x21) * (x49 + x62)) +ℤ (((uint64_t)(x7 + x23) * (x47 + x63)) +ℤ (((uint64_t)(x9 + x25) * (x45 + x61)) +ℤ (((uint64_t)(x11 + x27) * (x43 + x59)) + (((uint64_t)(x13 + x29) * (x41 + x57)) + (((uint64_t)(x15 + x31) * (x39 + x55)) + (((uint64_t)(x17 + x33) * (x37 + x53)) + ((uint64_t)(x19 + x32) * (x35 + x51))))))))) -ℤ (((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((uint64_t)x19 * x35))))))))), (((((uint64_t)x5 * x62) + (((uint64_t)x7 * x63) + (((uint64_t)x9 * x61) + (((uint64_t)x11 * x59) + (((uint64_t)x13 * x57) + (((uint64_t)x15 * x55) + (((uint64_t)x17 * x53) + ((uint64_t)x19 * x51)))))))) + (((uint64_t)x21 * x49) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + (((uint64_t)x27 * x43) + (((uint64_t)x29 * x41) + (((uint64_t)x31 * x39) + (((uint64_t)x33 * x37) + ((uint64_t)x32 * x35))))))))) +ℤ (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51))))))))));
+ { uint64_t x72 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x5 + x21) * (x47 + x63)) +ℤ (((uint64_t)(x7 + x23) * (x45 + x61)) +ℤ (((uint64_t)(x9 + x25) * (x43 + x59)) + (((uint64_t)(x11 + x27) * (x41 + x57)) + (((uint64_t)(x13 + x29) * (x39 + x55)) + (((uint64_t)(x15 + x31) * (x37 + x53)) + ((uint64_t)(x17 + x33) * (x35 + x51)))))))) -ℤ (((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((uint64_t)x17 * x35)))))))), (((((uint64_t)x5 * x63) + (((uint64_t)x7 * x61) + (((uint64_t)x9 * x59) + (((uint64_t)x11 * x57) + (((uint64_t)x13 * x55) + (((uint64_t)x15 * x53) + ((uint64_t)x17 * x51))))))) + (((uint64_t)x21 * x47) + (((uint64_t)x23 * x45) + (((uint64_t)x25 * x43) + (((uint64_t)x27 * x41) + (((uint64_t)x29 * x39) + (((uint64_t)x31 * x37) + ((uint64_t)x33 * x35)))))))) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + ((uint64_t)x33 * x51)))))))));
+ { uint64_t x73 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x5 + x21) * (x45 + x61)) +ℤ (((uint64_t)(x7 + x23) * (x43 + x59)) + (((uint64_t)(x9 + x25) * (x41 + x57)) + (((uint64_t)(x11 + x27) * (x39 + x55)) + (((uint64_t)(x13 + x29) * (x37 + x53)) + ((uint64_t)(x15 + x31) * (x35 + x51))))))) -ℤ (((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + ((uint64_t)x15 * x35))))))), (((((uint64_t)x5 * x61) + (((uint64_t)x7 * x59) + (((uint64_t)x9 * x57) + (((uint64_t)x11 * x55) + (((uint64_t)x13 * x53) + ((uint64_t)x15 * x51)))))) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + (((uint64_t)x27 * x39) + (((uint64_t)x29 * x37) + ((uint64_t)x31 * x35))))))) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + ((uint64_t)x31 * x51))))))));
+ { uint64_t x74 = ((((uint64_t)(x5 + x21) * (x43 + x59)) + (((uint64_t)(x7 + x23) * (x41 + x57)) + (((uint64_t)(x9 + x25) * (x39 + x55)) + (((uint64_t)(x11 + x27) * (x37 + x53)) + ((uint64_t)(x13 + x29) * (x35 + x51)))))) - (((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + ((uint64_t)x13 * x35))))));
+ { uint64_t x75 = ((((uint64_t)(x5 + x21) * (x41 + x57)) + (((uint64_t)(x7 + x23) * (x39 + x55)) + (((uint64_t)(x9 + x25) * (x37 + x53)) + ((uint64_t)(x11 + x27) * (x35 + x51))))) - (((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + ((uint64_t)x11 * x35)))));
+ { uint64_t x76 = ((((uint64_t)(x5 + x21) * (x39 + x55)) + (((uint64_t)(x7 + x23) * (x37 + x53)) + ((uint64_t)(x9 + x25) * (x35 + x51)))) - (((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + ((uint64_t)x9 * x35))));
+ { uint64_t x77 = ((((uint64_t)(x5 + x21) * (x37 + x53)) + ((uint64_t)(x7 + x23) * (x35 + x51))) - (((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)));
+ { uint64_t x78 = (((uint64_t)(x5 + x21) * (x35 + x51)) - ((uint64_t)x5 * x35));
+ { ℤ x79 = (((((uint64_t)x19 * x49) + ((uint64_t)x32 * x62)) +ℤ x72) +ℤ x64);
+ { ℤ x80 = ((((((uint64_t)x17 * x49) + ((uint64_t)x19 * x47)) + (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))) + x73) +ℤ x65);
+ { ℤ x81 = ((((((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + ((uint64_t)x19 * x45))) + (((uint64_t)x31 * x62) + (((uint64_t)x33 * x63) + ((uint64_t)x32 * x61)))) + x74) +ℤ x66);
+ { ℤ x82 = ((((((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + ((uint64_t)x19 * x43)))) + (((uint64_t)x29 * x62) + (((uint64_t)x31 * x63) + (((uint64_t)x33 * x61) + ((uint64_t)x32 * x59))))) + x75) +ℤ x67);
+ { ℤ x83 = ((((((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((uint64_t)x19 * x41))))) + (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + (((uint64_t)x31 * x61) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))) + x76) +ℤ x68);
+ { ℤ x84 = ((((((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + ((uint64_t)x19 * x39)))))) + (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))) + x77) +ℤ x69);
+ { ℤ x85 = ((((((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + ((uint64_t)x19 * x37))))))) + (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))) + x78) +ℤ x70);
+ { uint64_t x86 = ((((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((uint64_t)x19 * x35)))))))) + (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51)))))))));
+ { uint64_t x87 = (((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((uint64_t)x17 * x35))))))) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + ((uint64_t)x33 * x51)))))))) + x64);
+ { uint64_t x88 = (((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + ((uint64_t)x15 * x35)))))) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + ((uint64_t)x31 * x51))))))) + x65);
+ { uint64_t x89 = (((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + ((uint64_t)x13 * x35))))) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + ((uint64_t)x29 * x51)))))) + x66);
+ { uint64_t x90 = (((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + ((uint64_t)x11 * x35)))) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + ((uint64_t)x27 * x51))))) + x67);
+ { uint64_t x91 = (((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + ((uint64_t)x9 * x35))) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + ((uint64_t)x25 * x51)))) + x68);
+ { uint64_t x92 = (((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) + (((uint64_t)x21 * x53) + ((uint64_t)x23 * x51))) + x69);
+ { ℤ x93 = ((((uint64_t)x5 * x35) + ((uint64_t)x21 * x51)) +ℤ x70);
+ { uint64_t x94 = (x86 >> 0x1c);
+ { uint32_t x95 = ((uint32_t)x86 & 0xfffffff);
+ { uint64_t x96 = (x71 >> 0x1c);
+ { uint32_t x97 = (x71 & 0xfffffff);
+ { ℤ x98 = ((0x10000000 *ℤ x96) +ℤ x97);
+ { uint64_t x99 = (x98 >> 0x1c);
+ { uint32_t x100 = (x98 & 0xfffffff);
+ { ℤ x101 = ((x94 +ℤ x85) +ℤ x99);
+ { uint64_t x102 = (x101 >> 0x1c);
+ { uint32_t x103 = (x101 & 0xfffffff);
+ { ℤ x104 = (x93 +ℤ x99);
+ { uint64_t x105 = (x104 >> 0x1c);
+ { uint32_t x106 = (x104 & 0xfffffff);
+ { ℤ x107 = (x102 +ℤ x84);
+ { uint64_t x108 = (x107 >> 0x1c);
+ { uint32_t x109 = (x107 & 0xfffffff);
+ { uint64_t x110 = (x105 + x92);
+ { uint64_t x111 = (x110 >> 0x1c);
+ { uint32_t x112 = ((uint32_t)x110 & 0xfffffff);
+ { ℤ x113 = (x108 +ℤ x83);
+ { uint64_t x114 = (x113 >> 0x1c);
+ { uint32_t x115 = (x113 & 0xfffffff);
+ { uint64_t x116 = (x111 + x91);
+ { uint64_t x117 = (x116 >> 0x1c);
+ { uint32_t x118 = ((uint32_t)x116 & 0xfffffff);
+ { ℤ x119 = (x114 +ℤ x82);
+ { uint64_t x120 = (x119 >> 0x1c);
+ { uint32_t x121 = (x119 & 0xfffffff);
+ { uint64_t x122 = (x117 + x90);
+ { uint64_t x123 = (x122 >> 0x1c);
+ { uint32_t x124 = ((uint32_t)x122 & 0xfffffff);
+ { ℤ x125 = (x120 +ℤ x81);
+ { uint64_t x126 = (x125 >> 0x1c);
+ { uint32_t x127 = (x125 & 0xfffffff);
+ { uint64_t x128 = (x123 + x89);
+ { uint64_t x129 = (x128 >> 0x1c);
+ { uint32_t x130 = ((uint32_t)x128 & 0xfffffff);
+ { ℤ x131 = (x126 +ℤ x80);
+ { uint64_t x132 = (x131 >> 0x1c);
+ { uint32_t x133 = (x131 & 0xfffffff);
+ { uint64_t x134 = (x129 + x88);
+ { uint64_t x135 = (x134 >> 0x1c);
+ { uint32_t x136 = ((uint32_t)x134 & 0xfffffff);
+ { ℤ x137 = (x132 +ℤ x79);
+ { uint64_t x138 = (x137 >> 0x1c);
+ { uint32_t x139 = (x137 & 0xfffffff);
+ { uint64_t x140 = (x135 + x87);
+ { uint64_t x141 = (x140 >> 0x1c);
+ { uint32_t x142 = ((uint32_t)x140 & 0xfffffff);
+ { uint64_t x143 = (x138 + x100);
+ { uint32_t x144 = (uint32_t) (x143 >> 0x1c);
+ { uint32_t x145 = ((uint32_t)x143 & 0xfffffff);
+ { uint64_t x146 = (x141 + x95);
+ { uint32_t x147 = (uint32_t) (x146 >> 0x1c);
+ { uint32_t x148 = ((uint32_t)x146 & 0xfffffff);
+ { uint64_t x149 = (((uint64_t)0x10000000 * x144) + x145);
+ { uint32_t x150 = (uint32_t) (x149 >> 0x1c);
+ { uint32_t x151 = ((uint32_t)x149 & 0xfffffff);
+ { uint32_t x152 = ((x147 + x103) + x150);
+ { uint32_t x153 = (x152 >> 0x1c);
+ { uint32_t x154 = (x152 & 0xfffffff);
+ { uint32_t x155 = (x106 + x150);
+ { uint32_t x156 = (x155 >> 0x1c);
+ { uint32_t x157 = (x155 & 0xfffffff);
+ out[0] = x157;
+ out[1] = (x156 + x112);
+ out[2] = x118;
+ out[3] = x124;
+ out[4] = x130;
+ out[5] = x136;
+ out[6] = x142;
+ out[7] = x148;
+ out[8] = x154;
+ out[9] = (x153 + x109);
+ out[10] = x115;
+ out[11] = x121;
+ out[12] = x127;
+ out[13] = x133;
+ out[14] = x139;
+ out[15] = x151;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e448m2e224m1/fesquare.c b/src/Specific/solinas32_2e448m2e224m1/fesquare.c
index 26103564e..5f1a92651 100644
--- a/src/Specific/solinas32_2e448m2e224m1/fesquare.c
+++ b/src/Specific/solinas32_2e448m2e224m1/fesquare.c
@@ -1,131 +1,129 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x31 = (((uint64_t)(x16 + x29) * (x16 + x29)) - ((uint64_t)x16 * x16));
-{ uint64_t x32 = ((((uint64_t)(x14 + x30) * (x16 + x29)) + ((uint64_t)(x16 + x29) * (x14 + x30))) - (((uint64_t)x14 * x16) + ((uint64_t)x16 * x14)));
-{ uint64_t x33 = ((((uint64_t)(x12 + x28) * (x16 + x29)) + (((uint64_t)(x14 + x30) * (x14 + x30)) + ((uint64_t)(x16 + x29) * (x12 + x28)))) - (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + ((uint64_t)x16 * x12))));
-{ uint64_t x34 = ((((uint64_t)(x10 + x26) * (x16 + x29)) + (((uint64_t)(x12 + x28) * (x14 + x30)) + (((uint64_t)(x14 + x30) * (x12 + x28)) + ((uint64_t)(x16 + x29) * (x10 + x26))))) - (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x16 * x10)))));
-{ uint64_t x35 = ((((uint64_t)(x8 + x24) * (x16 + x29)) + (((uint64_t)(x10 + x26) * (x14 + x30)) + (((uint64_t)(x12 + x28) * (x12 + x28)) + (((uint64_t)(x14 + x30) * (x10 + x26)) + ((uint64_t)(x16 + x29) * (x8 + x24)))))) - (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x16 * x8))))));
-{ uint64_t x36 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x6 + x22) * (x16 + x29)) +ℤ (((uint64_t)(x8 + x24) * (x14 + x30)) + (((uint64_t)(x10 + x26) * (x12 + x28)) + (((uint64_t)(x12 + x28) * (x10 + x26)) + (((uint64_t)(x14 + x30) * (x8 + x24)) + ((uint64_t)(x16 + x29) * (x6 + x22))))))) -ℤ (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x16 * x6))))))), (((((uint64_t)x6 * x29) + (((uint64_t)x8 * x30) + (((uint64_t)x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + ((uint64_t)x16 * x22)))))) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + (((uint64_t)x28 * x10) + (((uint64_t)x30 * x8) + ((uint64_t)x29 * x6))))))) + (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))));
-{ uint64_t x37 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x4 + x20) * (x16 + x29)) +ℤ (((uint64_t)(x6 + x22) * (x14 + x30)) +ℤ (((uint64_t)(x8 + x24) * (x12 + x28)) + (((uint64_t)(x10 + x26) * (x10 + x26)) + (((uint64_t)(x12 + x28) * (x8 + x24)) + (((uint64_t)(x14 + x30) * (x6 + x22)) + ((uint64_t)(x16 + x29) * (x4 + x20)))))))) -ℤ (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((uint64_t)x16 * x4)))))))), (((((uint64_t)x4 * x29) + (((uint64_t)x6 * x30) + (((uint64_t)x8 * x28) + (((uint64_t)x10 * x26) + (((uint64_t)x12 * x24) + (((uint64_t)x14 * x22) + ((uint64_t)x16 * x20))))))) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + (((uint64_t)x24 * x12) + (((uint64_t)x26 * x10) + (((uint64_t)x28 * x8) + (((uint64_t)x30 * x6) + ((uint64_t)x29 * x4)))))))) + (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))));
-{ ℤ x38 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)(x2 + x18) * (x16 + x29)) +ℤ (((uint64_t)(x4 + x20) * (x14 + x30)) +ℤ (((uint64_t)(x6 + x22) * (x12 + x28)) +ℤ (((uint64_t)(x8 + x24) * (x10 + x26)) + (((uint64_t)(x10 + x26) * (x8 + x24)) + (((uint64_t)(x12 + x28) * (x6 + x22)) + (((uint64_t)(x14 + x30) * (x4 + x20)) + ((uint64_t)(x16 + x29) * (x2 + x18))))))))) -ℤ (((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2))))))))), (((((uint64_t)x2 * x29) + (((uint64_t)x4 * x30) + (((uint64_t)x6 * x28) + (((uint64_t)x8 * x26) + (((uint64_t)x10 * x24) + (((uint64_t)x12 * x22) + (((uint64_t)x14 * x20) + ((uint64_t)x16 * x18)))))))) + (((uint64_t)x18 * x16) + (((uint64_t)x20 * x14) + (((uint64_t)x22 * x12) + (((uint64_t)x24 * x10) + (((uint64_t)x26 * x8) + (((uint64_t)x28 * x6) + (((uint64_t)x30 * x4) + ((uint64_t)x29 * x2))))))))) +ℤ (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18))))))))));
-{ uint64_t x39 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x2 + x18) * (x14 + x30)) +ℤ (((uint64_t)(x4 + x20) * (x12 + x28)) +ℤ (((uint64_t)(x6 + x22) * (x10 + x26)) + (((uint64_t)(x8 + x24) * (x8 + x24)) + (((uint64_t)(x10 + x26) * (x6 + x22)) + (((uint64_t)(x12 + x28) * (x4 + x20)) + ((uint64_t)(x14 + x30) * (x2 + x18)))))))) -ℤ (((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2)))))))), (((((uint64_t)x2 * x30) + (((uint64_t)x4 * x28) + (((uint64_t)x6 * x26) + (((uint64_t)x8 * x24) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + ((uint64_t)x14 * x18))))))) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + (((uint64_t)x24 * x8) + (((uint64_t)x26 * x6) + (((uint64_t)x28 * x4) + ((uint64_t)x30 * x2)))))))) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + ((uint64_t)x30 * x18)))))))));
-{ uint64_t x40 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x2 + x18) * (x12 + x28)) +ℤ (((uint64_t)(x4 + x20) * (x10 + x26)) + (((uint64_t)(x6 + x22) * (x8 + x24)) + (((uint64_t)(x8 + x24) * (x6 + x22)) + (((uint64_t)(x10 + x26) * (x4 + x20)) + ((uint64_t)(x12 + x28) * (x2 + x18))))))) -ℤ (((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2))))))), (((((uint64_t)x2 * x28) + (((uint64_t)x4 * x26) + (((uint64_t)x6 * x24) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + ((uint64_t)x12 * x18)))))) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + (((uint64_t)x24 * x6) + (((uint64_t)x26 * x4) + ((uint64_t)x28 * x2))))))) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + ((uint64_t)x28 * x18))))))));
-{ uint64_t x41 = ((((uint64_t)(x2 + x18) * (x10 + x26)) + (((uint64_t)(x4 + x20) * (x8 + x24)) + (((uint64_t)(x6 + x22) * (x6 + x22)) + (((uint64_t)(x8 + x24) * (x4 + x20)) + ((uint64_t)(x10 + x26) * (x2 + x18)))))) - (((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))));
-{ uint64_t x42 = ((((uint64_t)(x2 + x18) * (x8 + x24)) + (((uint64_t)(x4 + x20) * (x6 + x22)) + (((uint64_t)(x6 + x22) * (x4 + x20)) + ((uint64_t)(x8 + x24) * (x2 + x18))))) - (((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))));
-{ uint64_t x43 = ((((uint64_t)(x2 + x18) * (x6 + x22)) + (((uint64_t)(x4 + x20) * (x4 + x20)) + ((uint64_t)(x6 + x22) * (x2 + x18)))) - (((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))));
-{ uint64_t x44 = ((((uint64_t)(x2 + x18) * (x4 + x20)) + ((uint64_t)(x4 + x20) * (x2 + x18))) - (((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)));
-{ uint64_t x45 = (((uint64_t)(x2 + x18) * (x2 + x18)) - ((uint64_t)x2 * x2));
-{ ℤ x46 = (((((uint64_t)x16 * x16) + ((uint64_t)x29 * x29)) +ℤ x39) +ℤ x31);
-{ ℤ x47 = ((((((uint64_t)x14 * x16) + ((uint64_t)x16 * x14)) + (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))) + x40) +ℤ x32);
-{ ℤ x48 = ((((((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + ((uint64_t)x16 * x12))) + (((uint64_t)x28 * x29) + (((uint64_t)x30 * x30) + ((uint64_t)x29 * x28)))) + x41) +ℤ x33);
-{ ℤ x49 = ((((((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x16 * x10)))) + (((uint64_t)x26 * x29) + (((uint64_t)x28 * x30) + (((uint64_t)x30 * x28) + ((uint64_t)x29 * x26))))) + x42) +ℤ x34);
-{ ℤ x50 = ((((((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x16 * x8))))) + (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + (((uint64_t)x28 * x28) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))) + x43) +ℤ x35);
-{ ℤ x51 = ((((((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x16 * x6)))))) + (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))) + x44) +ℤ x36);
-{ ℤ x52 = ((((((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((uint64_t)x16 * x4))))))) + (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))) + x45) +ℤ x37);
-{ uint64_t x53 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18)))))))));
-{ uint64_t x54 = (((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + ((uint64_t)x30 * x18)))))))) + x31);
-{ uint64_t x55 = (((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + ((uint64_t)x28 * x18))))))) + x32);
-{ uint64_t x56 = (((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + ((uint64_t)x26 * x18)))))) + x33);
-{ uint64_t x57 = (((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x24 * x18))))) + x34);
-{ uint64_t x58 = (((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + ((uint64_t)x22 * x18)))) + x35);
-{ uint64_t x59 = (((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (((uint64_t)x18 * x20) + ((uint64_t)x20 * x18))) + x36);
-{ ℤ x60 = ((((uint64_t)x2 * x2) + ((uint64_t)x18 * x18)) +ℤ x37);
-{ uint64_t x61 = (x53 >> 0x1c);
-{ uint32_t x62 = ((uint32_t)x53 & 0xfffffff);
-{ uint64_t x63 = (x38 >> 0x1c);
-{ uint32_t x64 = (x38 & 0xfffffff);
-{ ℤ x65 = ((0x10000000 *ℤ x63) +ℤ x64);
-{ uint64_t x66 = (x65 >> 0x1c);
-{ uint32_t x67 = (x65 & 0xfffffff);
-{ ℤ x68 = ((x61 +ℤ x52) +ℤ x66);
-{ uint64_t x69 = (x68 >> 0x1c);
-{ uint32_t x70 = (x68 & 0xfffffff);
-{ ℤ x71 = (x60 +ℤ x66);
-{ uint64_t x72 = (x71 >> 0x1c);
-{ uint32_t x73 = (x71 & 0xfffffff);
-{ ℤ x74 = (x69 +ℤ x51);
-{ uint64_t x75 = (x74 >> 0x1c);
-{ uint32_t x76 = (x74 & 0xfffffff);
-{ uint64_t x77 = (x72 + x59);
-{ uint64_t x78 = (x77 >> 0x1c);
-{ uint32_t x79 = ((uint32_t)x77 & 0xfffffff);
-{ ℤ x80 = (x75 +ℤ x50);
-{ uint64_t x81 = (x80 >> 0x1c);
-{ uint32_t x82 = (x80 & 0xfffffff);
-{ uint64_t x83 = (x78 + x58);
-{ uint64_t x84 = (x83 >> 0x1c);
-{ uint32_t x85 = ((uint32_t)x83 & 0xfffffff);
-{ ℤ x86 = (x81 +ℤ x49);
-{ uint64_t x87 = (x86 >> 0x1c);
-{ uint32_t x88 = (x86 & 0xfffffff);
-{ uint64_t x89 = (x84 + x57);
-{ uint64_t x90 = (x89 >> 0x1c);
-{ uint32_t x91 = ((uint32_t)x89 & 0xfffffff);
-{ ℤ x92 = (x87 +ℤ x48);
-{ uint64_t x93 = (x92 >> 0x1c);
-{ uint32_t x94 = (x92 & 0xfffffff);
-{ uint64_t x95 = (x90 + x56);
-{ uint64_t x96 = (x95 >> 0x1c);
-{ uint32_t x97 = ((uint32_t)x95 & 0xfffffff);
-{ ℤ x98 = (x93 +ℤ x47);
-{ uint64_t x99 = (x98 >> 0x1c);
-{ uint32_t x100 = (x98 & 0xfffffff);
-{ uint64_t x101 = (x96 + x55);
-{ uint64_t x102 = (x101 >> 0x1c);
-{ uint32_t x103 = ((uint32_t)x101 & 0xfffffff);
-{ ℤ x104 = (x99 +ℤ x46);
-{ uint64_t x105 = (x104 >> 0x1c);
-{ uint32_t x106 = (x104 & 0xfffffff);
-{ uint64_t x107 = (x102 + x54);
-{ uint64_t x108 = (x107 >> 0x1c);
-{ uint32_t x109 = ((uint32_t)x107 & 0xfffffff);
-{ uint64_t x110 = (x105 + x67);
-{ uint32_t x111 = (uint32_t) (x110 >> 0x1c);
-{ uint32_t x112 = ((uint32_t)x110 & 0xfffffff);
-{ uint64_t x113 = (x108 + x62);
-{ uint32_t x114 = (uint32_t) (x113 >> 0x1c);
-{ uint32_t x115 = ((uint32_t)x113 & 0xfffffff);
-{ uint64_t x116 = (((uint64_t)0x10000000 * x111) + x112);
-{ uint32_t x117 = (uint32_t) (x116 >> 0x1c);
-{ uint32_t x118 = ((uint32_t)x116 & 0xfffffff);
-{ uint32_t x119 = ((x114 + x70) + x117);
-{ uint32_t x120 = (x119 >> 0x1c);
-{ uint32_t x121 = (x119 & 0xfffffff);
-{ uint32_t x122 = (x73 + x117);
-{ uint32_t x123 = (x122 >> 0x1c);
-{ uint32_t x124 = (x122 & 0xfffffff);
-out[0] = x118;
-out[1] = x106;
-out[2] = x100;
-out[3] = x94;
-out[4] = x88;
-out[5] = x82;
-out[6] = x120 + x76;
-out[7] = x121;
-out[8] = x115;
-out[9] = x109;
-out[10] = x103;
-out[11] = x97;
-out[12] = x91;
-out[13] = x85;
-out[14] = x123 + x79;
-out[15] = x124;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[16];
+static void fesquare(uint32_t out[16], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint64_t x31 = (((uint64_t)(x16 + x29) * (x16 + x29)) - ((uint64_t)x16 * x16));
+ { uint64_t x32 = ((((uint64_t)(x14 + x30) * (x16 + x29)) + ((uint64_t)(x16 + x29) * (x14 + x30))) - (((uint64_t)x14 * x16) + ((uint64_t)x16 * x14)));
+ { uint64_t x33 = ((((uint64_t)(x12 + x28) * (x16 + x29)) + (((uint64_t)(x14 + x30) * (x14 + x30)) + ((uint64_t)(x16 + x29) * (x12 + x28)))) - (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + ((uint64_t)x16 * x12))));
+ { uint64_t x34 = ((((uint64_t)(x10 + x26) * (x16 + x29)) + (((uint64_t)(x12 + x28) * (x14 + x30)) + (((uint64_t)(x14 + x30) * (x12 + x28)) + ((uint64_t)(x16 + x29) * (x10 + x26))))) - (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x16 * x10)))));
+ { uint64_t x35 = ((((uint64_t)(x8 + x24) * (x16 + x29)) + (((uint64_t)(x10 + x26) * (x14 + x30)) + (((uint64_t)(x12 + x28) * (x12 + x28)) + (((uint64_t)(x14 + x30) * (x10 + x26)) + ((uint64_t)(x16 + x29) * (x8 + x24)))))) - (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x16 * x8))))));
+ { uint64_t x36 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x6 + x22) * (x16 + x29)) +ℤ (((uint64_t)(x8 + x24) * (x14 + x30)) + (((uint64_t)(x10 + x26) * (x12 + x28)) + (((uint64_t)(x12 + x28) * (x10 + x26)) + (((uint64_t)(x14 + x30) * (x8 + x24)) + ((uint64_t)(x16 + x29) * (x6 + x22))))))) -ℤ (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x16 * x6))))))), (((((uint64_t)x6 * x29) + (((uint64_t)x8 * x30) + (((uint64_t)x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + ((uint64_t)x16 * x22)))))) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + (((uint64_t)x28 * x10) + (((uint64_t)x30 * x8) + ((uint64_t)x29 * x6))))))) + (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))));
+ { uint64_t x37 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x4 + x20) * (x16 + x29)) +ℤ (((uint64_t)(x6 + x22) * (x14 + x30)) +ℤ (((uint64_t)(x8 + x24) * (x12 + x28)) + (((uint64_t)(x10 + x26) * (x10 + x26)) + (((uint64_t)(x12 + x28) * (x8 + x24)) + (((uint64_t)(x14 + x30) * (x6 + x22)) + ((uint64_t)(x16 + x29) * (x4 + x20)))))))) -ℤ (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((uint64_t)x16 * x4)))))))), (((((uint64_t)x4 * x29) + (((uint64_t)x6 * x30) + (((uint64_t)x8 * x28) + (((uint64_t)x10 * x26) + (((uint64_t)x12 * x24) + (((uint64_t)x14 * x22) + ((uint64_t)x16 * x20))))))) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + (((uint64_t)x24 * x12) + (((uint64_t)x26 * x10) + (((uint64_t)x28 * x8) + (((uint64_t)x30 * x6) + ((uint64_t)x29 * x4)))))))) + (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))));
+ { ℤ x38 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)(x2 + x18) * (x16 + x29)) +ℤ (((uint64_t)(x4 + x20) * (x14 + x30)) +ℤ (((uint64_t)(x6 + x22) * (x12 + x28)) +ℤ (((uint64_t)(x8 + x24) * (x10 + x26)) + (((uint64_t)(x10 + x26) * (x8 + x24)) + (((uint64_t)(x12 + x28) * (x6 + x22)) + (((uint64_t)(x14 + x30) * (x4 + x20)) + ((uint64_t)(x16 + x29) * (x2 + x18))))))))) -ℤ (((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2))))))))), (((((uint64_t)x2 * x29) + (((uint64_t)x4 * x30) + (((uint64_t)x6 * x28) + (((uint64_t)x8 * x26) + (((uint64_t)x10 * x24) + (((uint64_t)x12 * x22) + (((uint64_t)x14 * x20) + ((uint64_t)x16 * x18)))))))) + (((uint64_t)x18 * x16) + (((uint64_t)x20 * x14) + (((uint64_t)x22 * x12) + (((uint64_t)x24 * x10) + (((uint64_t)x26 * x8) + (((uint64_t)x28 * x6) + (((uint64_t)x30 * x4) + ((uint64_t)x29 * x2))))))))) +ℤ (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18))))))))));
+ { uint64_t x39 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x2 + x18) * (x14 + x30)) +ℤ (((uint64_t)(x4 + x20) * (x12 + x28)) +ℤ (((uint64_t)(x6 + x22) * (x10 + x26)) + (((uint64_t)(x8 + x24) * (x8 + x24)) + (((uint64_t)(x10 + x26) * (x6 + x22)) + (((uint64_t)(x12 + x28) * (x4 + x20)) + ((uint64_t)(x14 + x30) * (x2 + x18)))))))) -ℤ (((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2)))))))), (((((uint64_t)x2 * x30) + (((uint64_t)x4 * x28) + (((uint64_t)x6 * x26) + (((uint64_t)x8 * x24) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + ((uint64_t)x14 * x18))))))) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + (((uint64_t)x24 * x8) + (((uint64_t)x26 * x6) + (((uint64_t)x28 * x4) + ((uint64_t)x30 * x2)))))))) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + ((uint64_t)x30 * x18)))))))));
+ { uint64_t x40 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x2 + x18) * (x12 + x28)) +ℤ (((uint64_t)(x4 + x20) * (x10 + x26)) + (((uint64_t)(x6 + x22) * (x8 + x24)) + (((uint64_t)(x8 + x24) * (x6 + x22)) + (((uint64_t)(x10 + x26) * (x4 + x20)) + ((uint64_t)(x12 + x28) * (x2 + x18))))))) -ℤ (((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2))))))), (((((uint64_t)x2 * x28) + (((uint64_t)x4 * x26) + (((uint64_t)x6 * x24) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + ((uint64_t)x12 * x18)))))) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + (((uint64_t)x24 * x6) + (((uint64_t)x26 * x4) + ((uint64_t)x28 * x2))))))) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + ((uint64_t)x28 * x18))))))));
+ { uint64_t x41 = ((((uint64_t)(x2 + x18) * (x10 + x26)) + (((uint64_t)(x4 + x20) * (x8 + x24)) + (((uint64_t)(x6 + x22) * (x6 + x22)) + (((uint64_t)(x8 + x24) * (x4 + x20)) + ((uint64_t)(x10 + x26) * (x2 + x18)))))) - (((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))));
+ { uint64_t x42 = ((((uint64_t)(x2 + x18) * (x8 + x24)) + (((uint64_t)(x4 + x20) * (x6 + x22)) + (((uint64_t)(x6 + x22) * (x4 + x20)) + ((uint64_t)(x8 + x24) * (x2 + x18))))) - (((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))));
+ { uint64_t x43 = ((((uint64_t)(x2 + x18) * (x6 + x22)) + (((uint64_t)(x4 + x20) * (x4 + x20)) + ((uint64_t)(x6 + x22) * (x2 + x18)))) - (((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))));
+ { uint64_t x44 = ((((uint64_t)(x2 + x18) * (x4 + x20)) + ((uint64_t)(x4 + x20) * (x2 + x18))) - (((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)));
+ { uint64_t x45 = (((uint64_t)(x2 + x18) * (x2 + x18)) - ((uint64_t)x2 * x2));
+ { ℤ x46 = (((((uint64_t)x16 * x16) + ((uint64_t)x29 * x29)) +ℤ x39) +ℤ x31);
+ { ℤ x47 = ((((((uint64_t)x14 * x16) + ((uint64_t)x16 * x14)) + (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))) + x40) +ℤ x32);
+ { ℤ x48 = ((((((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + ((uint64_t)x16 * x12))) + (((uint64_t)x28 * x29) + (((uint64_t)x30 * x30) + ((uint64_t)x29 * x28)))) + x41) +ℤ x33);
+ { ℤ x49 = ((((((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x16 * x10)))) + (((uint64_t)x26 * x29) + (((uint64_t)x28 * x30) + (((uint64_t)x30 * x28) + ((uint64_t)x29 * x26))))) + x42) +ℤ x34);
+ { ℤ x50 = ((((((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x16 * x8))))) + (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + (((uint64_t)x28 * x28) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))) + x43) +ℤ x35);
+ { ℤ x51 = ((((((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x16 * x6)))))) + (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))) + x44) +ℤ x36);
+ { ℤ x52 = ((((((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((uint64_t)x16 * x4))))))) + (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))) + x45) +ℤ x37);
+ { uint64_t x53 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18)))))))));
+ { uint64_t x54 = (((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + ((uint64_t)x30 * x18)))))))) + x31);
+ { uint64_t x55 = (((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + ((uint64_t)x28 * x18))))))) + x32);
+ { uint64_t x56 = (((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + ((uint64_t)x26 * x18)))))) + x33);
+ { uint64_t x57 = (((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x24 * x18))))) + x34);
+ { uint64_t x58 = (((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + ((uint64_t)x22 * x18)))) + x35);
+ { uint64_t x59 = (((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (((uint64_t)x18 * x20) + ((uint64_t)x20 * x18))) + x36);
+ { ℤ x60 = ((((uint64_t)x2 * x2) + ((uint64_t)x18 * x18)) +ℤ x37);
+ { uint64_t x61 = (x53 >> 0x1c);
+ { uint32_t x62 = ((uint32_t)x53 & 0xfffffff);
+ { uint64_t x63 = (x38 >> 0x1c);
+ { uint32_t x64 = (x38 & 0xfffffff);
+ { ℤ x65 = ((0x10000000 *ℤ x63) +ℤ x64);
+ { uint64_t x66 = (x65 >> 0x1c);
+ { uint32_t x67 = (x65 & 0xfffffff);
+ { ℤ x68 = ((x61 +ℤ x52) +ℤ x66);
+ { uint64_t x69 = (x68 >> 0x1c);
+ { uint32_t x70 = (x68 & 0xfffffff);
+ { ℤ x71 = (x60 +ℤ x66);
+ { uint64_t x72 = (x71 >> 0x1c);
+ { uint32_t x73 = (x71 & 0xfffffff);
+ { ℤ x74 = (x69 +ℤ x51);
+ { uint64_t x75 = (x74 >> 0x1c);
+ { uint32_t x76 = (x74 & 0xfffffff);
+ { uint64_t x77 = (x72 + x59);
+ { uint64_t x78 = (x77 >> 0x1c);
+ { uint32_t x79 = ((uint32_t)x77 & 0xfffffff);
+ { ℤ x80 = (x75 +ℤ x50);
+ { uint64_t x81 = (x80 >> 0x1c);
+ { uint32_t x82 = (x80 & 0xfffffff);
+ { uint64_t x83 = (x78 + x58);
+ { uint64_t x84 = (x83 >> 0x1c);
+ { uint32_t x85 = ((uint32_t)x83 & 0xfffffff);
+ { ℤ x86 = (x81 +ℤ x49);
+ { uint64_t x87 = (x86 >> 0x1c);
+ { uint32_t x88 = (x86 & 0xfffffff);
+ { uint64_t x89 = (x84 + x57);
+ { uint64_t x90 = (x89 >> 0x1c);
+ { uint32_t x91 = ((uint32_t)x89 & 0xfffffff);
+ { ℤ x92 = (x87 +ℤ x48);
+ { uint64_t x93 = (x92 >> 0x1c);
+ { uint32_t x94 = (x92 & 0xfffffff);
+ { uint64_t x95 = (x90 + x56);
+ { uint64_t x96 = (x95 >> 0x1c);
+ { uint32_t x97 = ((uint32_t)x95 & 0xfffffff);
+ { ℤ x98 = (x93 +ℤ x47);
+ { uint64_t x99 = (x98 >> 0x1c);
+ { uint32_t x100 = (x98 & 0xfffffff);
+ { uint64_t x101 = (x96 + x55);
+ { uint64_t x102 = (x101 >> 0x1c);
+ { uint32_t x103 = ((uint32_t)x101 & 0xfffffff);
+ { ℤ x104 = (x99 +ℤ x46);
+ { uint64_t x105 = (x104 >> 0x1c);
+ { uint32_t x106 = (x104 & 0xfffffff);
+ { uint64_t x107 = (x102 + x54);
+ { uint64_t x108 = (x107 >> 0x1c);
+ { uint32_t x109 = ((uint32_t)x107 & 0xfffffff);
+ { uint64_t x110 = (x105 + x67);
+ { uint32_t x111 = (uint32_t) (x110 >> 0x1c);
+ { uint32_t x112 = ((uint32_t)x110 & 0xfffffff);
+ { uint64_t x113 = (x108 + x62);
+ { uint32_t x114 = (uint32_t) (x113 >> 0x1c);
+ { uint32_t x115 = ((uint32_t)x113 & 0xfffffff);
+ { uint64_t x116 = (((uint64_t)0x10000000 * x111) + x112);
+ { uint32_t x117 = (uint32_t) (x116 >> 0x1c);
+ { uint32_t x118 = ((uint32_t)x116 & 0xfffffff);
+ { uint32_t x119 = ((x114 + x70) + x117);
+ { uint32_t x120 = (x119 >> 0x1c);
+ { uint32_t x121 = (x119 & 0xfffffff);
+ { uint32_t x122 = (x73 + x117);
+ { uint32_t x123 = (x122 >> 0x1c);
+ { uint32_t x124 = (x122 & 0xfffffff);
+ out[0] = x124;
+ out[1] = (x123 + x79);
+ out[2] = x85;
+ out[3] = x91;
+ out[4] = x97;
+ out[5] = x103;
+ out[6] = x109;
+ out[7] = x115;
+ out[8] = x121;
+ out[9] = (x120 + x76);
+ out[10] = x82;
+ out[11] = x88;
+ out[12] = x94;
+ out[13] = x100;
+ out[14] = x106;
+ out[15] = x118;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e448m2e224m1/freeze.c b/src/Specific/solinas32_2e448m2e224m1/freeze.c
index 40dcf60a9..9077bb4bd 100644
--- a/src/Specific/solinas32_2e448m2e224m1/freeze.c
+++ b/src/Specific/solinas32_2e448m2e224m1/freeze.c
@@ -1,25 +1,84 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x32;
-out[1] = uint8_t x33 = Op Syntax.SubWithGetBorrow 28 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xfffffff;;
+static void freeze(uint32_t out[16], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0xfffffff);
+ { uint32_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x4, 0xfffffff);
+ { uint32_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x36, Return x6, 0xfffffff);
+ { uint32_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x39, Return x8, 0xfffffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x10, 0xfffffff);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x12, 0xfffffff);
+ { uint32_t x50, uint8_t x51 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x48, Return x14, 0xfffffff);
+ { uint32_t x53, uint8_t x54 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x51, Return x16, 0xfffffff);
+ { uint32_t x56, uint8_t x57 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x54, Return x18, 0xffffffe);
+ { uint32_t x59, uint8_t x60 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x20, 0xfffffff);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x22, 0xfffffff);
+ { uint32_t x65, uint8_t x66 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x24, 0xfffffff);
+ { uint32_t x68, uint8_t x69 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x66, Return x26, 0xfffffff);
+ { uint32_t x71, uint8_t x72 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x69, Return x28, 0xfffffff);
+ { uint32_t x74, uint8_t x75 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x30, 0xfffffff);
+ { uint32_t x77, uint8_t x78 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x29, 0xfffffff);
+ { uint32_t x79 = (uint32_t)cmovznz(x78, 0x0, 0xffffffff);
+ { uint32_t x80 = (x79 & 0xfffffff);
+ { uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x32, Return x80);
+ { uint32_t x84 = (x79 & 0xfffffff);
+ { uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x35, Return x84);
+ { uint32_t x88 = (x79 & 0xfffffff);
+ { uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x38, Return x88);
+ { uint32_t x92 = (x79 & 0xfffffff);
+ { uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x41, Return x92);
+ { uint32_t x96 = (x79 & 0xfffffff);
+ { uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x44, Return x96);
+ { uint32_t x100 = (x79 & 0xfffffff);
+ { uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x47, Return x100);
+ { uint32_t x104 = (x79 & 0xfffffff);
+ { uint32_t x106, uint8_t x107 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x50, Return x104);
+ { uint32_t x108 = (x79 & 0xfffffff);
+ { uint32_t x110, uint8_t x111 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x107, Return x53, Return x108);
+ { uint32_t x112 = (x79 & 0xffffffe);
+ { uint32_t x114, uint8_t x115 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x111, Return x56, Return x112);
+ { uint32_t x116 = (x79 & 0xfffffff);
+ { uint32_t x118, uint8_t x119 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x115, Return x59, Return x116);
+ { uint32_t x120 = (x79 & 0xfffffff);
+ { uint32_t x122, uint8_t x123 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x119, Return x62, Return x120);
+ { uint32_t x124 = (x79 & 0xfffffff);
+ { uint32_t x126, uint8_t x127 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x123, Return x65, Return x124);
+ { uint32_t x128 = (x79 & 0xfffffff);
+ { uint32_t x130, uint8_t x131 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x127, Return x68, Return x128);
+ { uint32_t x132 = (x79 & 0xfffffff);
+ { uint32_t x134, uint8_t x135 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x131, Return x71, Return x132);
+ { uint32_t x136 = (x79 & 0xfffffff);
+ { uint32_t x138, uint8_t x139 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x135, Return x74, Return x136);
+ { uint32_t x140 = (x79 & 0xfffffff);
+ { uint32_t x142, uint8_t _ = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x139, Return x77, Return x140);
+ out[0] = x82;
+ out[1] = x86;
+ out[2] = x90;
+ out[3] = x94;
+ out[4] = x98;
+ out[5] = x102;
+ out[6] = x106;
+ out[7] = x110;
+ out[8] = x114;
+ out[9] = x118;
+ out[10] = x122;
+ out[11] = x126;
+ out[12] = x130;
+ out[13] = x134;
+ out[14] = x138;
+ out[15] = x142;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e450m2e225m1/freeze.c b/src/Specific/solinas32_2e450m2e225m1/freeze.c
index 1e830873a..66e75b139 100644
--- a/src/Specific/solinas32_2e450m2e225m1/freeze.c
+++ b/src/Specific/solinas32_2e450m2e225m1/freeze.c
@@ -1,25 +1,79 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x30;
-out[1] = uint8_t x31 = Op Syntax.SubWithGetBorrow 30 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3fffffff;;
+static void freeze(uint32_t out[15], const uint32_t in1[15]) {
+ { const uint32_t x27 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x30, uint8_t x31 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x3fffffff);
+ { uint32_t x33, uint8_t x34 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x31, Return x4, 0x3fffffff);
+ { uint32_t x36, uint8_t x37 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x34, Return x6, 0x3fffffff);
+ { uint32_t x39, uint8_t x40 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x8, 0x3fffffff);
+ { uint32_t x42, uint8_t x43 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x40, Return x10, 0x3fffffff);
+ { uint32_t x45, uint8_t x46 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x43, Return x12, 0x3fffffff);
+ { uint32_t x48, uint8_t x49 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x46, Return x14, 0x3fffffff);
+ { uint32_t x51, uint8_t x52 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x49, Return x16, 0x3fff7fff);
+ { uint32_t x54, uint8_t x55 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x52, Return x18, 0x3fffffff);
+ { uint32_t x57, uint8_t x58 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x55, Return x20, 0x3fffffff);
+ { uint32_t x60, uint8_t x61 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x58, Return x22, 0x3fffffff);
+ { uint32_t x63, uint8_t x64 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x61, Return x24, 0x3fffffff);
+ { uint32_t x66, uint8_t x67 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x64, Return x26, 0x3fffffff);
+ { uint32_t x69, uint8_t x70 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x67, Return x28, 0x3fffffff);
+ { uint32_t x72, uint8_t x73 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x70, Return x27, 0x3fffffff);
+ { uint32_t x74 = (uint32_t)cmovznz(x73, 0x0, 0xffffffff);
+ { uint32_t x75 = (x74 & 0x3fffffff);
+ { uint32_t x77, uint8_t x78 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x30, Return x75);
+ { uint32_t x79 = (x74 & 0x3fffffff);
+ { uint32_t x81, uint8_t x82 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x78, Return x33, Return x79);
+ { uint32_t x83 = (x74 & 0x3fffffff);
+ { uint32_t x85, uint8_t x86 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x82, Return x36, Return x83);
+ { uint32_t x87 = (x74 & 0x3fffffff);
+ { uint32_t x89, uint8_t x90 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x86, Return x39, Return x87);
+ { uint32_t x91 = (x74 & 0x3fffffff);
+ { uint32_t x93, uint8_t x94 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x90, Return x42, Return x91);
+ { uint32_t x95 = (x74 & 0x3fffffff);
+ { uint32_t x97, uint8_t x98 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x94, Return x45, Return x95);
+ { uint32_t x99 = (x74 & 0x3fffffff);
+ { uint32_t x101, uint8_t x102 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x98, Return x48, Return x99);
+ { uint32_t x103 = (x74 & 0x3fff7fff);
+ { uint32_t x105, uint8_t x106 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x102, Return x51, Return x103);
+ { uint32_t x107 = (x74 & 0x3fffffff);
+ { uint32_t x109, uint8_t x110 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x106, Return x54, Return x107);
+ { uint32_t x111 = (x74 & 0x3fffffff);
+ { uint32_t x113, uint8_t x114 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x110, Return x57, Return x111);
+ { uint32_t x115 = (x74 & 0x3fffffff);
+ { uint32_t x117, uint8_t x118 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x114, Return x60, Return x115);
+ { uint32_t x119 = (x74 & 0x3fffffff);
+ { uint32_t x121, uint8_t x122 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x118, Return x63, Return x119);
+ { uint32_t x123 = (x74 & 0x3fffffff);
+ { uint32_t x125, uint8_t x126 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x122, Return x66, Return x123);
+ { uint32_t x127 = (x74 & 0x3fffffff);
+ { uint32_t x129, uint8_t x130 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x126, Return x69, Return x127);
+ { uint32_t x131 = (x74 & 0x3fffffff);
+ { uint32_t x133, uint8_t _ = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x130, Return x72, Return x131);
+ out[0] = x77;
+ out[1] = x81;
+ out[2] = x85;
+ out[3] = x89;
+ out[4] = x93;
+ out[5] = x97;
+ out[6] = x101;
+ out[7] = x105;
+ out[8] = x109;
+ out[9] = x113;
+ out[10] = x117;
+ out[11] = x121;
+ out[12] = x125;
+ out[13] = x129;
+ out[14] = x133;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e452m3/femul.c b/src/Specific/solinas32_2e452m3/femul.c
index fd3b61467..97c535118 100644
--- a/src/Specific/solinas32_2e452m3/femul.c
+++ b/src/Specific/solinas32_2e452m3/femul.c
@@ -1,106 +1,120 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
-{ ℤ x64 = (((uint64_t)x5 * x62) +ℤ ((0x2 * ((uint64_t)x7 * x63)) +ℤ ((0x2 * ((uint64_t)x9 * x61)) +ℤ (((uint64_t)x11 * x59) +ℤ (((uint64_t)x13 * x57) +ℤ ((0x2 * ((uint64_t)x15 * x55)) + ((0x2 * ((uint64_t)x17 * x53)) + (((uint64_t)x19 * x51) + (((uint64_t)x21 * x49) + ((0x2 * ((uint64_t)x23 * x47)) + ((0x2 * ((uint64_t)x25 * x45)) + (((uint64_t)x27 * x43) + (((uint64_t)x29 * x41) + ((0x2 * ((uint64_t)x31 * x39)) + ((0x2 * ((uint64_t)x33 * x37)) + ((uint64_t)x32 * x35))))))))))))))));
-{ ℤ x65 = ((((uint64_t)x5 * x63) +ℤ ((0x2 * ((uint64_t)x7 * x61)) +ℤ (((uint64_t)x9 * x59) +ℤ (((uint64_t)x11 * x57) + (((uint64_t)x13 * x55) + ((0x2 * ((uint64_t)x15 * x53)) + (((uint64_t)x17 * x51) + (((uint64_t)x19 * x49) + (((uint64_t)x21 * x47) + ((0x2 * ((uint64_t)x23 * x45)) + (((uint64_t)x25 * x43) + (((uint64_t)x27 * x41) + (((uint64_t)x29 * x39) + ((0x2 * ((uint64_t)x31 * x37)) + ((uint64_t)x33 * x35))))))))))))))) +ℤ (0x3 * ((uint64_t)x32 * x62)));
-{ ℤ x66 = ((((uint64_t)x5 * x61) + (((uint64_t)x7 * x59) + (((uint64_t)x9 * x57) + (((uint64_t)x11 * x55) + (((uint64_t)x13 * x53) + (((uint64_t)x15 * x51) + (((uint64_t)x17 * x49) + (((uint64_t)x19 * x47) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + (((uint64_t)x27 * x39) + (((uint64_t)x29 * x37) + ((uint64_t)x31 * x35)))))))))))))) +ℤ (0x3 * (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))));
-{ ℤ x67 = ((((uint64_t)x5 * x59) +ℤ ((0x2 * ((uint64_t)x7 * x57)) +ℤ ((0x2 * ((uint64_t)x9 * x55)) +ℤ ((0x2 * ((uint64_t)x11 * x53)) +ℤ (((uint64_t)x13 * x51) +ℤ ((0x2 * ((uint64_t)x15 * x49)) + ((0x2 * ((uint64_t)x17 * x47)) + ((0x2 * ((uint64_t)x19 * x45)) + (((uint64_t)x21 * x43) + ((0x2 * ((uint64_t)x23 * x41)) + ((0x2 * ((uint64_t)x25 * x39)) + ((0x2 * ((uint64_t)x27 * x37)) + ((uint64_t)x29 * x35))))))))))))) +ℤ (0x3 * ((0x2 * ((uint64_t)x31 * x62)) + ((0x2 * ((uint64_t)x33 * x63)) + (0x2 * ((uint64_t)x32 * x61))))));
-{ ℤ x68 = ((((uint64_t)x5 * x57) +ℤ ((0x2 * ((uint64_t)x7 * x55)) + ((0x2 * ((uint64_t)x9 * x53)) + (((uint64_t)x11 * x51) + (((uint64_t)x13 * x49) + ((0x2 * ((uint64_t)x15 * x47)) + ((0x2 * ((uint64_t)x17 * x45)) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + ((0x2 * ((uint64_t)x23 * x39)) + ((0x2 * ((uint64_t)x25 * x37)) + ((uint64_t)x27 * x35)))))))))))) +ℤ (0x3 *ℤ (((uint64_t)x29 * x62) + ((0x2 * ((uint64_t)x31 * x63)) + ((0x2 * ((uint64_t)x33 * x61)) + ((uint64_t)x32 * x59))))));
-{ ℤ x69 = ((((uint64_t)x5 * x55) + ((0x2 * ((uint64_t)x7 * x53)) + (((uint64_t)x9 * x51) + (((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + ((0x2 * ((uint64_t)x15 * x45)) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + ((0x2 * ((uint64_t)x23 * x37)) + ((uint64_t)x25 * x35))))))))))) +ℤ (0x3 *ℤ (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + ((0x2 * ((uint64_t)x31 * x61)) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))));
-{ ℤ x70 = ((((uint64_t)x5 * x53) + (((uint64_t)x7 * x51) + (((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + ((uint64_t)x23 * x35)))))))))) +ℤ (0x3 *ℤ (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))));
-{ ℤ x71 = ((((uint64_t)x5 * x51) +ℤ ((0x2 * ((uint64_t)x7 * x49)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + (((uint64_t)x13 * x43) + ((0x2 * ((uint64_t)x15 * x41)) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + ((uint64_t)x21 * x35))))))))) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x23 * x62)) + ((0x2 * ((uint64_t)x25 * x63)) + ((0x2 * ((uint64_t)x27 * x61)) + (((uint64_t)x29 * x59) + ((0x2 * ((uint64_t)x31 * x57)) + ((0x2 * ((uint64_t)x33 * x55)) + (0x2 * ((uint64_t)x32 * x53))))))))));
-{ ℤ x72 = ((((uint64_t)x5 * x49) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + ((0x2 * ((uint64_t)x15 * x39)) + ((0x2 * ((uint64_t)x17 * x37)) + ((uint64_t)x19 * x35)))))))) +ℤ (0x3 *ℤ (((uint64_t)x21 * x62) + ((0x2 * ((uint64_t)x23 * x63)) + ((0x2 * ((uint64_t)x25 * x61)) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + ((0x2 * ((uint64_t)x31 * x55)) + ((0x2 * ((uint64_t)x33 * x53)) + ((uint64_t)x32 * x51))))))))));
-{ ℤ x73 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + ((0x2 * ((uint64_t)x15 * x37)) + ((uint64_t)x17 * x35))))))) +ℤ (0x3 *ℤ (((uint64_t)x19 * x62) + (((uint64_t)x21 * x63) + ((0x2 * ((uint64_t)x23 * x61)) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + ((0x2 * ((uint64_t)x31 * x53)) + (((uint64_t)x33 * x51) + ((uint64_t)x32 * x49)))))))))));
-{ ℤ x74 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + ((uint64_t)x15 * x35)))))) +ℤ (0x3 *ℤ (((uint64_t)x17 * x62) + (((uint64_t)x19 * x63) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + (((uint64_t)x31 * x51) + (((uint64_t)x33 * x49) + ((uint64_t)x32 * x47))))))))))));
-{ ℤ x75 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((uint64_t)x13 * x35))))) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x15 * x62)) +ℤ ((0x2 * ((uint64_t)x17 * x63)) +ℤ ((0x2 * ((uint64_t)x19 * x61)) + (((uint64_t)x21 * x59) + ((0x2 * ((uint64_t)x23 * x57)) + ((0x2 * ((uint64_t)x25 * x55)) + ((0x2 * ((uint64_t)x27 * x53)) + (((uint64_t)x29 * x51) + ((0x2 * ((uint64_t)x31 * x49)) + ((0x2 * ((uint64_t)x33 * x47)) + (0x2 * ((uint64_t)x32 * x45))))))))))))));
-{ ℤ x76 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((uint64_t)x11 * x35)))) +ℤ (0x3 *ℤ (((uint64_t)x13 * x62) +ℤ ((0x2 * ((uint64_t)x15 * x63)) + ((0x2 * ((uint64_t)x17 * x61)) + (((uint64_t)x19 * x59) + (((uint64_t)x21 * x57) + ((0x2 * ((uint64_t)x23 * x55)) + ((0x2 * ((uint64_t)x25 * x53)) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + ((0x2 * ((uint64_t)x31 * x47)) + ((0x2 * ((uint64_t)x33 * x45)) + ((uint64_t)x32 * x43))))))))))))));
-{ ℤ x77 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((uint64_t)x9 * x35))) +ℤ (0x3 *ℤ (((uint64_t)x11 * x62) + (((uint64_t)x13 * x63) + ((0x2 * ((uint64_t)x15 * x61)) + (((uint64_t)x17 * x59) + (((uint64_t)x19 * x57) + (((uint64_t)x21 * x55) + ((0x2 * ((uint64_t)x23 * x53)) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + ((0x2 * ((uint64_t)x31 * x45)) + (((uint64_t)x33 * x43) + ((uint64_t)x32 * x41)))))))))))))));
-{ ℤ x78 = ((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) +ℤ (0x3 *ℤ (((uint64_t)x9 * x62) + (((uint64_t)x11 * x63) + (((uint64_t)x13 * x61) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + (((uint64_t)x33 * x41) + ((uint64_t)x32 * x39))))))))))))))));
-{ ℤ x79 = (((uint64_t)x5 * x35) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x7 * x62)) +ℤ ((0x2 * ((uint64_t)x9 * x63)) +ℤ ((0x2 * ((uint64_t)x11 * x61)) +ℤ (((uint64_t)x13 * x59) +ℤ ((0x2 * ((uint64_t)x15 * x57)) +ℤ ((0x2 * ((uint64_t)x17 * x55)) +ℤ ((0x2 * ((uint64_t)x19 * x53)) + (((uint64_t)x21 * x51) + ((0x2 * ((uint64_t)x23 * x49)) + ((0x2 * ((uint64_t)x25 * x47)) + ((0x2 * ((uint64_t)x27 * x45)) + (((uint64_t)x29 * x43) + ((0x2 * ((uint64_t)x31 * x41)) + ((0x2 * ((uint64_t)x33 * x39)) + (0x2 * ((uint64_t)x32 * x37))))))))))))))))));
-{ uint64_t x80 = (x79 >> 0x1d);
-{ uint32_t x81 = (x79 & 0x1fffffff);
-{ ℤ x82 = (x80 +ℤ x78);
-{ uint64_t x83 = (x82 >> 0x1c);
-{ uint32_t x84 = (x82 & 0xfffffff);
-{ ℤ x85 = (x83 +ℤ x77);
-{ uint64_t x86 = (x85 >> 0x1c);
-{ uint32_t x87 = (x85 & 0xfffffff);
-{ ℤ x88 = (x86 +ℤ x76);
-{ uint64_t x89 = (x88 >> 0x1c);
-{ uint32_t x90 = (x88 & 0xfffffff);
-{ ℤ x91 = (x89 +ℤ x75);
-{ uint64_t x92 = (x91 >> 0x1d);
-{ uint32_t x93 = (x91 & 0x1fffffff);
-{ ℤ x94 = (x92 +ℤ x74);
-{ uint64_t x95 = (x94 >> 0x1c);
-{ uint32_t x96 = (x94 & 0xfffffff);
-{ ℤ x97 = (x95 +ℤ x73);
-{ uint64_t x98 = (x97 >> 0x1c);
-{ uint32_t x99 = (x97 & 0xfffffff);
-{ ℤ x100 = (x98 +ℤ x72);
-{ uint64_t x101 = (x100 >> 0x1c);
-{ uint32_t x102 = (x100 & 0xfffffff);
-{ ℤ x103 = (x101 +ℤ x71);
-{ uint64_t x104 = (x103 >> 0x1d);
-{ uint32_t x105 = (x103 & 0x1fffffff);
-{ ℤ x106 = (x104 +ℤ x70);
-{ uint64_t x107 = (x106 >> 0x1c);
-{ uint32_t x108 = (x106 & 0xfffffff);
-{ ℤ x109 = (x107 +ℤ x69);
-{ uint64_t x110 = (x109 >> 0x1c);
-{ uint32_t x111 = (x109 & 0xfffffff);
-{ ℤ x112 = (x110 +ℤ x68);
-{ uint64_t x113 = (x112 >> 0x1c);
-{ uint32_t x114 = (x112 & 0xfffffff);
-{ ℤ x115 = (x113 +ℤ x67);
-{ uint64_t x116 = (x115 >> 0x1d);
-{ uint32_t x117 = (x115 & 0x1fffffff);
-{ ℤ x118 = (x116 +ℤ x66);
-{ uint64_t x119 = (x118 >> 0x1c);
-{ uint32_t x120 = (x118 & 0xfffffff);
-{ ℤ x121 = (x119 +ℤ x65);
-{ uint64_t x122 = (x121 >> 0x1c);
-{ uint32_t x123 = (x121 & 0xfffffff);
-{ ℤ x124 = (x122 +ℤ x64);
-{ uint64_t x125 = (x124 >> 0x1c);
-{ uint32_t x126 = (x124 & 0xfffffff);
-{ uint64_t x127 = (x81 + (0x3 * x125));
-{ uint32_t x128 = (uint32_t) (x127 >> 0x1d);
-{ uint32_t x129 = ((uint32_t)x127 & 0x1fffffff);
-{ uint32_t x130 = (x128 + x84);
-{ uint32_t x131 = (x130 >> 0x1c);
-{ uint32_t x132 = (x130 & 0xfffffff);
-out[0] = x126;
-out[1] = x123;
-out[2] = x120;
-out[3] = x117;
-out[4] = x114;
-out[5] = x111;
-out[6] = x108;
-out[7] = x105;
-out[8] = x102;
-out[9] = x99;
-out[10] = x96;
-out[11] = x93;
-out[12] = x90;
-out[13] = x131 + x87;
-out[14] = x132;
-out[15] = x129;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[16];
+static void femul(uint32_t out[16], const uint32_t in1[16], const uint32_t in2[16]) {
+ { const uint32_t x32 = in1[15];
+ { const uint32_t x33 = in1[14];
+ { const uint32_t x31 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x62 = in2[15];
+ { const uint32_t x63 = in2[14];
+ { const uint32_t x61 = in2[13];
+ { const uint32_t x59 = in2[12];
+ { const uint32_t x57 = in2[11];
+ { const uint32_t x55 = in2[10];
+ { const uint32_t x53 = in2[9];
+ { const uint32_t x51 = in2[8];
+ { const uint32_t x49 = in2[7];
+ { const uint32_t x47 = in2[6];
+ { const uint32_t x45 = in2[5];
+ { const uint32_t x43 = in2[4];
+ { const uint32_t x41 = in2[3];
+ { const uint32_t x39 = in2[2];
+ { const uint32_t x37 = in2[1];
+ { const uint32_t x35 = in2[0];
+ { ℤ x64 = (((uint64_t)x5 * x62) +ℤ ((0x2 * ((uint64_t)x7 * x63)) +ℤ ((0x2 * ((uint64_t)x9 * x61)) +ℤ (((uint64_t)x11 * x59) +ℤ (((uint64_t)x13 * x57) +ℤ ((0x2 * ((uint64_t)x15 * x55)) + ((0x2 * ((uint64_t)x17 * x53)) + (((uint64_t)x19 * x51) + (((uint64_t)x21 * x49) + ((0x2 * ((uint64_t)x23 * x47)) + ((0x2 * ((uint64_t)x25 * x45)) + (((uint64_t)x27 * x43) + (((uint64_t)x29 * x41) + ((0x2 * ((uint64_t)x31 * x39)) + ((0x2 * ((uint64_t)x33 * x37)) + ((uint64_t)x32 * x35))))))))))))))));
+ { ℤ x65 = ((((uint64_t)x5 * x63) +ℤ ((0x2 * ((uint64_t)x7 * x61)) +ℤ (((uint64_t)x9 * x59) +ℤ (((uint64_t)x11 * x57) + (((uint64_t)x13 * x55) + ((0x2 * ((uint64_t)x15 * x53)) + (((uint64_t)x17 * x51) + (((uint64_t)x19 * x49) + (((uint64_t)x21 * x47) + ((0x2 * ((uint64_t)x23 * x45)) + (((uint64_t)x25 * x43) + (((uint64_t)x27 * x41) + (((uint64_t)x29 * x39) + ((0x2 * ((uint64_t)x31 * x37)) + ((uint64_t)x33 * x35))))))))))))))) +ℤ (0x3 * ((uint64_t)x32 * x62)));
+ { ℤ x66 = ((((uint64_t)x5 * x61) + (((uint64_t)x7 * x59) + (((uint64_t)x9 * x57) + (((uint64_t)x11 * x55) + (((uint64_t)x13 * x53) + (((uint64_t)x15 * x51) + (((uint64_t)x17 * x49) + (((uint64_t)x19 * x47) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + (((uint64_t)x27 * x39) + (((uint64_t)x29 * x37) + ((uint64_t)x31 * x35)))))))))))))) +ℤ (0x3 * (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))));
+ { ℤ x67 = ((((uint64_t)x5 * x59) +ℤ ((0x2 * ((uint64_t)x7 * x57)) +ℤ ((0x2 * ((uint64_t)x9 * x55)) +ℤ ((0x2 * ((uint64_t)x11 * x53)) +ℤ (((uint64_t)x13 * x51) +ℤ ((0x2 * ((uint64_t)x15 * x49)) + ((0x2 * ((uint64_t)x17 * x47)) + ((0x2 * ((uint64_t)x19 * x45)) + (((uint64_t)x21 * x43) + ((0x2 * ((uint64_t)x23 * x41)) + ((0x2 * ((uint64_t)x25 * x39)) + ((0x2 * ((uint64_t)x27 * x37)) + ((uint64_t)x29 * x35))))))))))))) +ℤ (0x3 * ((0x2 * ((uint64_t)x31 * x62)) + ((0x2 * ((uint64_t)x33 * x63)) + (0x2 * ((uint64_t)x32 * x61))))));
+ { ℤ x68 = ((((uint64_t)x5 * x57) +ℤ ((0x2 * ((uint64_t)x7 * x55)) + ((0x2 * ((uint64_t)x9 * x53)) + (((uint64_t)x11 * x51) + (((uint64_t)x13 * x49) + ((0x2 * ((uint64_t)x15 * x47)) + ((0x2 * ((uint64_t)x17 * x45)) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + ((0x2 * ((uint64_t)x23 * x39)) + ((0x2 * ((uint64_t)x25 * x37)) + ((uint64_t)x27 * x35)))))))))))) +ℤ (0x3 *ℤ (((uint64_t)x29 * x62) + ((0x2 * ((uint64_t)x31 * x63)) + ((0x2 * ((uint64_t)x33 * x61)) + ((uint64_t)x32 * x59))))));
+ { ℤ x69 = ((((uint64_t)x5 * x55) + ((0x2 * ((uint64_t)x7 * x53)) + (((uint64_t)x9 * x51) + (((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + ((0x2 * ((uint64_t)x15 * x45)) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + ((0x2 * ((uint64_t)x23 * x37)) + ((uint64_t)x25 * x35))))))))))) +ℤ (0x3 *ℤ (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + ((0x2 * ((uint64_t)x31 * x61)) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))));
+ { ℤ x70 = ((((uint64_t)x5 * x53) + (((uint64_t)x7 * x51) + (((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + ((uint64_t)x23 * x35)))))))))) +ℤ (0x3 *ℤ (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))));
+ { ℤ x71 = ((((uint64_t)x5 * x51) +ℤ ((0x2 * ((uint64_t)x7 * x49)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + (((uint64_t)x13 * x43) + ((0x2 * ((uint64_t)x15 * x41)) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + ((uint64_t)x21 * x35))))))))) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x23 * x62)) + ((0x2 * ((uint64_t)x25 * x63)) + ((0x2 * ((uint64_t)x27 * x61)) + (((uint64_t)x29 * x59) + ((0x2 * ((uint64_t)x31 * x57)) + ((0x2 * ((uint64_t)x33 * x55)) + (0x2 * ((uint64_t)x32 * x53))))))))));
+ { ℤ x72 = ((((uint64_t)x5 * x49) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + ((0x2 * ((uint64_t)x15 * x39)) + ((0x2 * ((uint64_t)x17 * x37)) + ((uint64_t)x19 * x35)))))))) +ℤ (0x3 *ℤ (((uint64_t)x21 * x62) + ((0x2 * ((uint64_t)x23 * x63)) + ((0x2 * ((uint64_t)x25 * x61)) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + ((0x2 * ((uint64_t)x31 * x55)) + ((0x2 * ((uint64_t)x33 * x53)) + ((uint64_t)x32 * x51))))))))));
+ { ℤ x73 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + ((0x2 * ((uint64_t)x15 * x37)) + ((uint64_t)x17 * x35))))))) +ℤ (0x3 *ℤ (((uint64_t)x19 * x62) + (((uint64_t)x21 * x63) + ((0x2 * ((uint64_t)x23 * x61)) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + ((0x2 * ((uint64_t)x31 * x53)) + (((uint64_t)x33 * x51) + ((uint64_t)x32 * x49)))))))))));
+ { ℤ x74 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + ((uint64_t)x15 * x35)))))) +ℤ (0x3 *ℤ (((uint64_t)x17 * x62) + (((uint64_t)x19 * x63) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + (((uint64_t)x31 * x51) + (((uint64_t)x33 * x49) + ((uint64_t)x32 * x47))))))))))));
+ { ℤ x75 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((uint64_t)x13 * x35))))) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x15 * x62)) +ℤ ((0x2 * ((uint64_t)x17 * x63)) +ℤ ((0x2 * ((uint64_t)x19 * x61)) + (((uint64_t)x21 * x59) + ((0x2 * ((uint64_t)x23 * x57)) + ((0x2 * ((uint64_t)x25 * x55)) + ((0x2 * ((uint64_t)x27 * x53)) + (((uint64_t)x29 * x51) + ((0x2 * ((uint64_t)x31 * x49)) + ((0x2 * ((uint64_t)x33 * x47)) + (0x2 * ((uint64_t)x32 * x45))))))))))))));
+ { ℤ x76 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((uint64_t)x11 * x35)))) +ℤ (0x3 *ℤ (((uint64_t)x13 * x62) +ℤ ((0x2 * ((uint64_t)x15 * x63)) + ((0x2 * ((uint64_t)x17 * x61)) + (((uint64_t)x19 * x59) + (((uint64_t)x21 * x57) + ((0x2 * ((uint64_t)x23 * x55)) + ((0x2 * ((uint64_t)x25 * x53)) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + ((0x2 * ((uint64_t)x31 * x47)) + ((0x2 * ((uint64_t)x33 * x45)) + ((uint64_t)x32 * x43))))))))))))));
+ { ℤ x77 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((uint64_t)x9 * x35))) +ℤ (0x3 *ℤ (((uint64_t)x11 * x62) + (((uint64_t)x13 * x63) + ((0x2 * ((uint64_t)x15 * x61)) + (((uint64_t)x17 * x59) + (((uint64_t)x19 * x57) + (((uint64_t)x21 * x55) + ((0x2 * ((uint64_t)x23 * x53)) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + ((0x2 * ((uint64_t)x31 * x45)) + (((uint64_t)x33 * x43) + ((uint64_t)x32 * x41)))))))))))))));
+ { ℤ x78 = ((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) +ℤ (0x3 *ℤ (((uint64_t)x9 * x62) + (((uint64_t)x11 * x63) + (((uint64_t)x13 * x61) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + (((uint64_t)x33 * x41) + ((uint64_t)x32 * x39))))))))))))))));
+ { ℤ x79 = (((uint64_t)x5 * x35) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x7 * x62)) +ℤ ((0x2 * ((uint64_t)x9 * x63)) +ℤ ((0x2 * ((uint64_t)x11 * x61)) +ℤ (((uint64_t)x13 * x59) +ℤ ((0x2 * ((uint64_t)x15 * x57)) +ℤ ((0x2 * ((uint64_t)x17 * x55)) +ℤ ((0x2 * ((uint64_t)x19 * x53)) + (((uint64_t)x21 * x51) + ((0x2 * ((uint64_t)x23 * x49)) + ((0x2 * ((uint64_t)x25 * x47)) + ((0x2 * ((uint64_t)x27 * x45)) + (((uint64_t)x29 * x43) + ((0x2 * ((uint64_t)x31 * x41)) + ((0x2 * ((uint64_t)x33 * x39)) + (0x2 * ((uint64_t)x32 * x37))))))))))))))))));
+ { uint64_t x80 = (x79 >> 0x1d);
+ { uint32_t x81 = (x79 & 0x1fffffff);
+ { ℤ x82 = (x80 +ℤ x78);
+ { uint64_t x83 = (x82 >> 0x1c);
+ { uint32_t x84 = (x82 & 0xfffffff);
+ { ℤ x85 = (x83 +ℤ x77);
+ { uint64_t x86 = (x85 >> 0x1c);
+ { uint32_t x87 = (x85 & 0xfffffff);
+ { ℤ x88 = (x86 +ℤ x76);
+ { uint64_t x89 = (x88 >> 0x1c);
+ { uint32_t x90 = (x88 & 0xfffffff);
+ { ℤ x91 = (x89 +ℤ x75);
+ { uint64_t x92 = (x91 >> 0x1d);
+ { uint32_t x93 = (x91 & 0x1fffffff);
+ { ℤ x94 = (x92 +ℤ x74);
+ { uint64_t x95 = (x94 >> 0x1c);
+ { uint32_t x96 = (x94 & 0xfffffff);
+ { ℤ x97 = (x95 +ℤ x73);
+ { uint64_t x98 = (x97 >> 0x1c);
+ { uint32_t x99 = (x97 & 0xfffffff);
+ { ℤ x100 = (x98 +ℤ x72);
+ { uint64_t x101 = (x100 >> 0x1c);
+ { uint32_t x102 = (x100 & 0xfffffff);
+ { ℤ x103 = (x101 +ℤ x71);
+ { uint64_t x104 = (x103 >> 0x1d);
+ { uint32_t x105 = (x103 & 0x1fffffff);
+ { ℤ x106 = (x104 +ℤ x70);
+ { uint64_t x107 = (x106 >> 0x1c);
+ { uint32_t x108 = (x106 & 0xfffffff);
+ { ℤ x109 = (x107 +ℤ x69);
+ { uint64_t x110 = (x109 >> 0x1c);
+ { uint32_t x111 = (x109 & 0xfffffff);
+ { ℤ x112 = (x110 +ℤ x68);
+ { uint64_t x113 = (x112 >> 0x1c);
+ { uint32_t x114 = (x112 & 0xfffffff);
+ { ℤ x115 = (x113 +ℤ x67);
+ { uint64_t x116 = (x115 >> 0x1d);
+ { uint32_t x117 = (x115 & 0x1fffffff);
+ { ℤ x118 = (x116 +ℤ x66);
+ { uint64_t x119 = (x118 >> 0x1c);
+ { uint32_t x120 = (x118 & 0xfffffff);
+ { ℤ x121 = (x119 +ℤ x65);
+ { uint64_t x122 = (x121 >> 0x1c);
+ { uint32_t x123 = (x121 & 0xfffffff);
+ { ℤ x124 = (x122 +ℤ x64);
+ { uint64_t x125 = (x124 >> 0x1c);
+ { uint32_t x126 = (x124 & 0xfffffff);
+ { uint64_t x127 = (x81 + (0x3 * x125));
+ { uint32_t x128 = (uint32_t) (x127 >> 0x1d);
+ { uint32_t x129 = ((uint32_t)x127 & 0x1fffffff);
+ { uint32_t x130 = (x128 + x84);
+ { uint32_t x131 = (x130 >> 0x1c);
+ { uint32_t x132 = (x130 & 0xfffffff);
+ out[0] = x129;
+ out[1] = x132;
+ out[2] = (x131 + x87);
+ out[3] = x90;
+ out[4] = x93;
+ out[5] = x96;
+ out[6] = x99;
+ out[7] = x102;
+ out[8] = x105;
+ out[9] = x108;
+ out[10] = x111;
+ out[11] = x114;
+ out[12] = x117;
+ out[13] = x120;
+ out[14] = x123;
+ out[15] = x126;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e452m3/fesquare.c b/src/Specific/solinas32_2e452m3/fesquare.c
index 31169c7b1..60b1933ab 100644
--- a/src/Specific/solinas32_2e452m3/fesquare.c
+++ b/src/Specific/solinas32_2e452m3/fesquare.c
@@ -1,106 +1,104 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ ℤ x31 = (((uint64_t)x2 * x29) +ℤ ((0x2 * ((uint64_t)x4 * x30)) +ℤ ((0x2 * ((uint64_t)x6 * x28)) +ℤ (((uint64_t)x8 * x26) +ℤ (((uint64_t)x10 * x24) +ℤ ((0x2 * ((uint64_t)x12 * x22)) + ((0x2 * ((uint64_t)x14 * x20)) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((0x2 * ((uint64_t)x20 * x14)) + ((0x2 * ((uint64_t)x22 * x12)) + (((uint64_t)x24 * x10) + (((uint64_t)x26 * x8) + ((0x2 * ((uint64_t)x28 * x6)) + ((0x2 * ((uint64_t)x30 * x4)) + ((uint64_t)x29 * x2))))))))))))))));
-{ ℤ x32 = ((((uint64_t)x2 * x30) +ℤ ((0x2 * ((uint64_t)x4 * x28)) +ℤ (((uint64_t)x6 * x26) +ℤ (((uint64_t)x8 * x24) + (((uint64_t)x10 * x22) + ((0x2 * ((uint64_t)x12 * x20)) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + ((0x2 * ((uint64_t)x20 * x12)) + (((uint64_t)x22 * x10) + (((uint64_t)x24 * x8) + (((uint64_t)x26 * x6) + ((0x2 * ((uint64_t)x28 * x4)) + ((uint64_t)x30 * x2))))))))))))))) +ℤ (0x3 * ((uint64_t)x29 * x29)));
-{ ℤ x33 = ((((uint64_t)x2 * x28) + (((uint64_t)x4 * x26) + (((uint64_t)x6 * x24) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + (((uint64_t)x24 * x6) + (((uint64_t)x26 * x4) + ((uint64_t)x28 * x2)))))))))))))) +ℤ (0x3 * (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))));
-{ ℤ x34 = ((((uint64_t)x2 * x26) +ℤ ((0x2 * ((uint64_t)x4 * x24)) +ℤ ((0x2 * ((uint64_t)x6 * x22)) +ℤ ((0x2 * ((uint64_t)x8 * x20)) +ℤ (((uint64_t)x10 * x18) +ℤ ((0x2 * ((uint64_t)x12 * x16)) + ((0x2 * ((uint64_t)x14 * x14)) + ((0x2 * ((uint64_t)x16 * x12)) + (((uint64_t)x18 * x10) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + ((0x2 * ((uint64_t)x24 * x4)) + ((uint64_t)x26 * x2))))))))))))) +ℤ (0x3 * ((0x2 * ((uint64_t)x28 * x29)) + ((0x2 * ((uint64_t)x30 * x30)) + (0x2 * ((uint64_t)x29 * x28))))));
-{ ℤ x35 = ((((uint64_t)x2 * x24) +ℤ ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x24 * x2)))))))))))) +ℤ (0x3 *ℤ (((uint64_t)x26 * x29) + ((0x2 * ((uint64_t)x28 * x30)) + ((0x2 * ((uint64_t)x30 * x28)) + ((uint64_t)x29 * x26))))));
-{ ℤ x36 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + ((0x2 * ((uint64_t)x12 * x12)) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) +ℤ (0x3 *ℤ (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + ((0x2 * ((uint64_t)x28 * x28)) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))));
-{ ℤ x37 = ((((uint64_t)x2 * x20) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x20 * x2)))))))))) +ℤ (0x3 *ℤ (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))));
-{ ℤ x38 = ((((uint64_t)x2 * x18) +ℤ ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + (((uint64_t)x10 * x10) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x20 * x29)) + ((0x2 * ((uint64_t)x22 * x30)) + ((0x2 * ((uint64_t)x24 * x28)) + (((uint64_t)x26 * x26) + ((0x2 * ((uint64_t)x28 * x24)) + ((0x2 * ((uint64_t)x30 * x22)) + (0x2 * ((uint64_t)x29 * x20))))))))));
-{ ℤ x39 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) +ℤ (0x3 *ℤ (((uint64_t)x18 * x29) + ((0x2 * ((uint64_t)x20 * x30)) + ((0x2 * ((uint64_t)x22 * x28)) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + ((0x2 * ((uint64_t)x28 * x22)) + ((0x2 * ((uint64_t)x30 * x20)) + ((uint64_t)x29 * x18))))))))));
-{ ℤ x40 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) +ℤ (0x3 *ℤ (((uint64_t)x16 * x29) + (((uint64_t)x18 * x30) + ((0x2 * ((uint64_t)x20 * x28)) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + ((0x2 * ((uint64_t)x28 * x20)) + (((uint64_t)x30 * x18) + ((uint64_t)x29 * x16)))))))))));
-{ ℤ x41 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) +ℤ (0x3 *ℤ (((uint64_t)x14 * x29) + (((uint64_t)x16 * x30) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + (((uint64_t)x28 * x18) + (((uint64_t)x30 * x16) + ((uint64_t)x29 * x14))))))))))));
-{ ℤ x42 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x12 * x29)) +ℤ ((0x2 * ((uint64_t)x14 * x30)) +ℤ ((0x2 * ((uint64_t)x16 * x28)) + (((uint64_t)x18 * x26) + ((0x2 * ((uint64_t)x20 * x24)) + ((0x2 * ((uint64_t)x22 * x22)) + ((0x2 * ((uint64_t)x24 * x20)) + (((uint64_t)x26 * x18) + ((0x2 * ((uint64_t)x28 * x16)) + ((0x2 * ((uint64_t)x30 * x14)) + (0x2 * ((uint64_t)x29 * x12))))))))))))));
-{ ℤ x43 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) +ℤ (0x3 *ℤ (((uint64_t)x10 * x29) +ℤ ((0x2 * ((uint64_t)x12 * x30)) + ((0x2 * ((uint64_t)x14 * x28)) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + ((0x2 * ((uint64_t)x20 * x22)) + ((0x2 * ((uint64_t)x22 * x20)) + (((uint64_t)x24 * x18) + (((uint64_t)x26 * x16) + ((0x2 * ((uint64_t)x28 * x14)) + ((0x2 * ((uint64_t)x30 * x12)) + ((uint64_t)x29 * x10))))))))))))));
-{ ℤ x44 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) +ℤ (0x3 *ℤ (((uint64_t)x8 * x29) + (((uint64_t)x10 * x30) + ((0x2 * ((uint64_t)x12 * x28)) + (((uint64_t)x14 * x26) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + ((0x2 * ((uint64_t)x20 * x20)) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + (((uint64_t)x26 * x14) + ((0x2 * ((uint64_t)x28 * x12)) + (((uint64_t)x30 * x10) + ((uint64_t)x29 * x8)))))))))))))));
-{ ℤ x45 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) +ℤ (0x3 *ℤ (((uint64_t)x6 * x29) + (((uint64_t)x8 * x30) + (((uint64_t)x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + (((uint64_t)x28 * x10) + (((uint64_t)x30 * x8) + ((uint64_t)x29 * x6))))))))))))))));
-{ ℤ x46 = (((uint64_t)x2 * x2) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x4 * x29)) +ℤ ((0x2 * ((uint64_t)x6 * x30)) +ℤ ((0x2 * ((uint64_t)x8 * x28)) +ℤ (((uint64_t)x10 * x26) +ℤ ((0x2 * ((uint64_t)x12 * x24)) +ℤ ((0x2 * ((uint64_t)x14 * x22)) +ℤ ((0x2 * ((uint64_t)x16 * x20)) + (((uint64_t)x18 * x18) + ((0x2 * ((uint64_t)x20 * x16)) + ((0x2 * ((uint64_t)x22 * x14)) + ((0x2 * ((uint64_t)x24 * x12)) + (((uint64_t)x26 * x10) + ((0x2 * ((uint64_t)x28 * x8)) + ((0x2 * ((uint64_t)x30 * x6)) + (0x2 * ((uint64_t)x29 * x4))))))))))))))))));
-{ uint64_t x47 = (x46 >> 0x1d);
-{ uint32_t x48 = (x46 & 0x1fffffff);
-{ ℤ x49 = (x47 +ℤ x45);
-{ uint64_t x50 = (x49 >> 0x1c);
-{ uint32_t x51 = (x49 & 0xfffffff);
-{ ℤ x52 = (x50 +ℤ x44);
-{ uint64_t x53 = (x52 >> 0x1c);
-{ uint32_t x54 = (x52 & 0xfffffff);
-{ ℤ x55 = (x53 +ℤ x43);
-{ uint64_t x56 = (x55 >> 0x1c);
-{ uint32_t x57 = (x55 & 0xfffffff);
-{ ℤ x58 = (x56 +ℤ x42);
-{ uint64_t x59 = (x58 >> 0x1d);
-{ uint32_t x60 = (x58 & 0x1fffffff);
-{ ℤ x61 = (x59 +ℤ x41);
-{ uint64_t x62 = (x61 >> 0x1c);
-{ uint32_t x63 = (x61 & 0xfffffff);
-{ ℤ x64 = (x62 +ℤ x40);
-{ uint64_t x65 = (x64 >> 0x1c);
-{ uint32_t x66 = (x64 & 0xfffffff);
-{ ℤ x67 = (x65 +ℤ x39);
-{ uint64_t x68 = (x67 >> 0x1c);
-{ uint32_t x69 = (x67 & 0xfffffff);
-{ ℤ x70 = (x68 +ℤ x38);
-{ uint64_t x71 = (x70 >> 0x1d);
-{ uint32_t x72 = (x70 & 0x1fffffff);
-{ ℤ x73 = (x71 +ℤ x37);
-{ uint64_t x74 = (x73 >> 0x1c);
-{ uint32_t x75 = (x73 & 0xfffffff);
-{ ℤ x76 = (x74 +ℤ x36);
-{ uint64_t x77 = (x76 >> 0x1c);
-{ uint32_t x78 = (x76 & 0xfffffff);
-{ ℤ x79 = (x77 +ℤ x35);
-{ uint64_t x80 = (x79 >> 0x1c);
-{ uint32_t x81 = (x79 & 0xfffffff);
-{ ℤ x82 = (x80 +ℤ x34);
-{ uint64_t x83 = (x82 >> 0x1d);
-{ uint32_t x84 = (x82 & 0x1fffffff);
-{ ℤ x85 = (x83 +ℤ x33);
-{ uint64_t x86 = (x85 >> 0x1c);
-{ uint32_t x87 = (x85 & 0xfffffff);
-{ ℤ x88 = (x86 +ℤ x32);
-{ uint64_t x89 = (x88 >> 0x1c);
-{ uint32_t x90 = (x88 & 0xfffffff);
-{ ℤ x91 = (x89 +ℤ x31);
-{ uint64_t x92 = (x91 >> 0x1c);
-{ uint32_t x93 = (x91 & 0xfffffff);
-{ uint64_t x94 = (x48 + (0x3 * x92));
-{ uint32_t x95 = (uint32_t) (x94 >> 0x1d);
-{ uint32_t x96 = ((uint32_t)x94 & 0x1fffffff);
-{ uint32_t x97 = (x95 + x51);
-{ uint32_t x98 = (x97 >> 0x1c);
-{ uint32_t x99 = (x97 & 0xfffffff);
-out[0] = x93;
-out[1] = x90;
-out[2] = x87;
-out[3] = x84;
-out[4] = x81;
-out[5] = x78;
-out[6] = x75;
-out[7] = x72;
-out[8] = x69;
-out[9] = x66;
-out[10] = x63;
-out[11] = x60;
-out[12] = x57;
-out[13] = x98 + x54;
-out[14] = x99;
-out[15] = x96;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[16];
+static void fesquare(uint32_t out[16], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { ℤ x31 = (((uint64_t)x2 * x29) +ℤ ((0x2 * ((uint64_t)x4 * x30)) +ℤ ((0x2 * ((uint64_t)x6 * x28)) +ℤ (((uint64_t)x8 * x26) +ℤ (((uint64_t)x10 * x24) +ℤ ((0x2 * ((uint64_t)x12 * x22)) + ((0x2 * ((uint64_t)x14 * x20)) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((0x2 * ((uint64_t)x20 * x14)) + ((0x2 * ((uint64_t)x22 * x12)) + (((uint64_t)x24 * x10) + (((uint64_t)x26 * x8) + ((0x2 * ((uint64_t)x28 * x6)) + ((0x2 * ((uint64_t)x30 * x4)) + ((uint64_t)x29 * x2))))))))))))))));
+ { ℤ x32 = ((((uint64_t)x2 * x30) +ℤ ((0x2 * ((uint64_t)x4 * x28)) +ℤ (((uint64_t)x6 * x26) +ℤ (((uint64_t)x8 * x24) + (((uint64_t)x10 * x22) + ((0x2 * ((uint64_t)x12 * x20)) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + ((0x2 * ((uint64_t)x20 * x12)) + (((uint64_t)x22 * x10) + (((uint64_t)x24 * x8) + (((uint64_t)x26 * x6) + ((0x2 * ((uint64_t)x28 * x4)) + ((uint64_t)x30 * x2))))))))))))))) +ℤ (0x3 * ((uint64_t)x29 * x29)));
+ { ℤ x33 = ((((uint64_t)x2 * x28) + (((uint64_t)x4 * x26) + (((uint64_t)x6 * x24) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + (((uint64_t)x24 * x6) + (((uint64_t)x26 * x4) + ((uint64_t)x28 * x2)))))))))))))) +ℤ (0x3 * (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))));
+ { ℤ x34 = ((((uint64_t)x2 * x26) +ℤ ((0x2 * ((uint64_t)x4 * x24)) +ℤ ((0x2 * ((uint64_t)x6 * x22)) +ℤ ((0x2 * ((uint64_t)x8 * x20)) +ℤ (((uint64_t)x10 * x18) +ℤ ((0x2 * ((uint64_t)x12 * x16)) + ((0x2 * ((uint64_t)x14 * x14)) + ((0x2 * ((uint64_t)x16 * x12)) + (((uint64_t)x18 * x10) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + ((0x2 * ((uint64_t)x24 * x4)) + ((uint64_t)x26 * x2))))))))))))) +ℤ (0x3 * ((0x2 * ((uint64_t)x28 * x29)) + ((0x2 * ((uint64_t)x30 * x30)) + (0x2 * ((uint64_t)x29 * x28))))));
+ { ℤ x35 = ((((uint64_t)x2 * x24) +ℤ ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x24 * x2)))))))))))) +ℤ (0x3 *ℤ (((uint64_t)x26 * x29) + ((0x2 * ((uint64_t)x28 * x30)) + ((0x2 * ((uint64_t)x30 * x28)) + ((uint64_t)x29 * x26))))));
+ { ℤ x36 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + ((0x2 * ((uint64_t)x12 * x12)) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) +ℤ (0x3 *ℤ (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + ((0x2 * ((uint64_t)x28 * x28)) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))));
+ { ℤ x37 = ((((uint64_t)x2 * x20) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x20 * x2)))))))))) +ℤ (0x3 *ℤ (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))));
+ { ℤ x38 = ((((uint64_t)x2 * x18) +ℤ ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + (((uint64_t)x10 * x10) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x20 * x29)) + ((0x2 * ((uint64_t)x22 * x30)) + ((0x2 * ((uint64_t)x24 * x28)) + (((uint64_t)x26 * x26) + ((0x2 * ((uint64_t)x28 * x24)) + ((0x2 * ((uint64_t)x30 * x22)) + (0x2 * ((uint64_t)x29 * x20))))))))));
+ { ℤ x39 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) +ℤ (0x3 *ℤ (((uint64_t)x18 * x29) + ((0x2 * ((uint64_t)x20 * x30)) + ((0x2 * ((uint64_t)x22 * x28)) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + ((0x2 * ((uint64_t)x28 * x22)) + ((0x2 * ((uint64_t)x30 * x20)) + ((uint64_t)x29 * x18))))))))));
+ { ℤ x40 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) +ℤ (0x3 *ℤ (((uint64_t)x16 * x29) + (((uint64_t)x18 * x30) + ((0x2 * ((uint64_t)x20 * x28)) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + ((0x2 * ((uint64_t)x28 * x20)) + (((uint64_t)x30 * x18) + ((uint64_t)x29 * x16)))))))))));
+ { ℤ x41 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) +ℤ (0x3 *ℤ (((uint64_t)x14 * x29) + (((uint64_t)x16 * x30) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + (((uint64_t)x28 * x18) + (((uint64_t)x30 * x16) + ((uint64_t)x29 * x14))))))))))));
+ { ℤ x42 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x12 * x29)) +ℤ ((0x2 * ((uint64_t)x14 * x30)) +ℤ ((0x2 * ((uint64_t)x16 * x28)) + (((uint64_t)x18 * x26) + ((0x2 * ((uint64_t)x20 * x24)) + ((0x2 * ((uint64_t)x22 * x22)) + ((0x2 * ((uint64_t)x24 * x20)) + (((uint64_t)x26 * x18) + ((0x2 * ((uint64_t)x28 * x16)) + ((0x2 * ((uint64_t)x30 * x14)) + (0x2 * ((uint64_t)x29 * x12))))))))))))));
+ { ℤ x43 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) +ℤ (0x3 *ℤ (((uint64_t)x10 * x29) +ℤ ((0x2 * ((uint64_t)x12 * x30)) + ((0x2 * ((uint64_t)x14 * x28)) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + ((0x2 * ((uint64_t)x20 * x22)) + ((0x2 * ((uint64_t)x22 * x20)) + (((uint64_t)x24 * x18) + (((uint64_t)x26 * x16) + ((0x2 * ((uint64_t)x28 * x14)) + ((0x2 * ((uint64_t)x30 * x12)) + ((uint64_t)x29 * x10))))))))))))));
+ { ℤ x44 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) +ℤ (0x3 *ℤ (((uint64_t)x8 * x29) + (((uint64_t)x10 * x30) + ((0x2 * ((uint64_t)x12 * x28)) + (((uint64_t)x14 * x26) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + ((0x2 * ((uint64_t)x20 * x20)) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + (((uint64_t)x26 * x14) + ((0x2 * ((uint64_t)x28 * x12)) + (((uint64_t)x30 * x10) + ((uint64_t)x29 * x8)))))))))))))));
+ { ℤ x45 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) +ℤ (0x3 *ℤ (((uint64_t)x6 * x29) + (((uint64_t)x8 * x30) + (((uint64_t)x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + (((uint64_t)x28 * x10) + (((uint64_t)x30 * x8) + ((uint64_t)x29 * x6))))))))))))))));
+ { ℤ x46 = (((uint64_t)x2 * x2) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x4 * x29)) +ℤ ((0x2 * ((uint64_t)x6 * x30)) +ℤ ((0x2 * ((uint64_t)x8 * x28)) +ℤ (((uint64_t)x10 * x26) +ℤ ((0x2 * ((uint64_t)x12 * x24)) +ℤ ((0x2 * ((uint64_t)x14 * x22)) +ℤ ((0x2 * ((uint64_t)x16 * x20)) + (((uint64_t)x18 * x18) + ((0x2 * ((uint64_t)x20 * x16)) + ((0x2 * ((uint64_t)x22 * x14)) + ((0x2 * ((uint64_t)x24 * x12)) + (((uint64_t)x26 * x10) + ((0x2 * ((uint64_t)x28 * x8)) + ((0x2 * ((uint64_t)x30 * x6)) + (0x2 * ((uint64_t)x29 * x4))))))))))))))))));
+ { uint64_t x47 = (x46 >> 0x1d);
+ { uint32_t x48 = (x46 & 0x1fffffff);
+ { ℤ x49 = (x47 +ℤ x45);
+ { uint64_t x50 = (x49 >> 0x1c);
+ { uint32_t x51 = (x49 & 0xfffffff);
+ { ℤ x52 = (x50 +ℤ x44);
+ { uint64_t x53 = (x52 >> 0x1c);
+ { uint32_t x54 = (x52 & 0xfffffff);
+ { ℤ x55 = (x53 +ℤ x43);
+ { uint64_t x56 = (x55 >> 0x1c);
+ { uint32_t x57 = (x55 & 0xfffffff);
+ { ℤ x58 = (x56 +ℤ x42);
+ { uint64_t x59 = (x58 >> 0x1d);
+ { uint32_t x60 = (x58 & 0x1fffffff);
+ { ℤ x61 = (x59 +ℤ x41);
+ { uint64_t x62 = (x61 >> 0x1c);
+ { uint32_t x63 = (x61 & 0xfffffff);
+ { ℤ x64 = (x62 +ℤ x40);
+ { uint64_t x65 = (x64 >> 0x1c);
+ { uint32_t x66 = (x64 & 0xfffffff);
+ { ℤ x67 = (x65 +ℤ x39);
+ { uint64_t x68 = (x67 >> 0x1c);
+ { uint32_t x69 = (x67 & 0xfffffff);
+ { ℤ x70 = (x68 +ℤ x38);
+ { uint64_t x71 = (x70 >> 0x1d);
+ { uint32_t x72 = (x70 & 0x1fffffff);
+ { ℤ x73 = (x71 +ℤ x37);
+ { uint64_t x74 = (x73 >> 0x1c);
+ { uint32_t x75 = (x73 & 0xfffffff);
+ { ℤ x76 = (x74 +ℤ x36);
+ { uint64_t x77 = (x76 >> 0x1c);
+ { uint32_t x78 = (x76 & 0xfffffff);
+ { ℤ x79 = (x77 +ℤ x35);
+ { uint64_t x80 = (x79 >> 0x1c);
+ { uint32_t x81 = (x79 & 0xfffffff);
+ { ℤ x82 = (x80 +ℤ x34);
+ { uint64_t x83 = (x82 >> 0x1d);
+ { uint32_t x84 = (x82 & 0x1fffffff);
+ { ℤ x85 = (x83 +ℤ x33);
+ { uint64_t x86 = (x85 >> 0x1c);
+ { uint32_t x87 = (x85 & 0xfffffff);
+ { ℤ x88 = (x86 +ℤ x32);
+ { uint64_t x89 = (x88 >> 0x1c);
+ { uint32_t x90 = (x88 & 0xfffffff);
+ { ℤ x91 = (x89 +ℤ x31);
+ { uint64_t x92 = (x91 >> 0x1c);
+ { uint32_t x93 = (x91 & 0xfffffff);
+ { uint64_t x94 = (x48 + (0x3 * x92));
+ { uint32_t x95 = (uint32_t) (x94 >> 0x1d);
+ { uint32_t x96 = ((uint32_t)x94 & 0x1fffffff);
+ { uint32_t x97 = (x95 + x51);
+ { uint32_t x98 = (x97 >> 0x1c);
+ { uint32_t x99 = (x97 & 0xfffffff);
+ out[0] = x96;
+ out[1] = x99;
+ out[2] = (x98 + x54);
+ out[3] = x57;
+ out[4] = x60;
+ out[5] = x63;
+ out[6] = x66;
+ out[7] = x69;
+ out[8] = x72;
+ out[9] = x75;
+ out[10] = x78;
+ out[11] = x81;
+ out[12] = x84;
+ out[13] = x87;
+ out[14] = x90;
+ out[15] = x93;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e452m3/freeze.c b/src/Specific/solinas32_2e452m3/freeze.c
index ddf7dd7c3..aefe4819b 100644
--- a/src/Specific/solinas32_2e452m3/freeze.c
+++ b/src/Specific/solinas32_2e452m3/freeze.c
@@ -1,25 +1,84 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x32;
-out[1] = uint8_t x33 = Op Syntax.SubWithGetBorrow 29 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x1ffffffd;;
+static void freeze(uint32_t out[16], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x1ffffffd);
+ { uint32_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x4, 0xfffffff);
+ { uint32_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x36, Return x6, 0xfffffff);
+ { uint32_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x39, Return x8, 0xfffffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x10, 0x1fffffff);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x12, 0xfffffff);
+ { uint32_t x50, uint8_t x51 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x48, Return x14, 0xfffffff);
+ { uint32_t x53, uint8_t x54 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x51, Return x16, 0xfffffff);
+ { uint32_t x56, uint8_t x57 = Op (Syntax.SubWithGetBorrow 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x54, Return x18, 0x1fffffff);
+ { uint32_t x59, uint8_t x60 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x20, 0xfffffff);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x22, 0xfffffff);
+ { uint32_t x65, uint8_t x66 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x24, 0xfffffff);
+ { uint32_t x68, uint8_t x69 = Op (Syntax.SubWithGetBorrow 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x66, Return x26, 0x1fffffff);
+ { uint32_t x71, uint8_t x72 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x69, Return x28, 0xfffffff);
+ { uint32_t x74, uint8_t x75 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x30, 0xfffffff);
+ { uint32_t x77, uint8_t x78 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x29, 0xfffffff);
+ { uint32_t x79 = (uint32_t)cmovznz(x78, 0x0, 0xffffffff);
+ { uint32_t x80 = (x79 & 0x1ffffffd);
+ { uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x32, Return x80);
+ { uint32_t x84 = (x79 & 0xfffffff);
+ { uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x35, Return x84);
+ { uint32_t x88 = (x79 & 0xfffffff);
+ { uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x38, Return x88);
+ { uint32_t x92 = (x79 & 0xfffffff);
+ { uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x41, Return x92);
+ { uint32_t x96 = (x79 & 0x1fffffff);
+ { uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x44, Return x96);
+ { uint32_t x100 = (x79 & 0xfffffff);
+ { uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x47, Return x100);
+ { uint32_t x104 = (x79 & 0xfffffff);
+ { uint32_t x106, uint8_t x107 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x50, Return x104);
+ { uint32_t x108 = (x79 & 0xfffffff);
+ { uint32_t x110, uint8_t x111 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x107, Return x53, Return x108);
+ { uint32_t x112 = (x79 & 0x1fffffff);
+ { uint32_t x114, uint8_t x115 = Op (Syntax.AddWithGetCarry 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x111, Return x56, Return x112);
+ { uint32_t x116 = (x79 & 0xfffffff);
+ { uint32_t x118, uint8_t x119 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x115, Return x59, Return x116);
+ { uint32_t x120 = (x79 & 0xfffffff);
+ { uint32_t x122, uint8_t x123 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x119, Return x62, Return x120);
+ { uint32_t x124 = (x79 & 0xfffffff);
+ { uint32_t x126, uint8_t x127 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x123, Return x65, Return x124);
+ { uint32_t x128 = (x79 & 0x1fffffff);
+ { uint32_t x130, uint8_t x131 = Op (Syntax.AddWithGetCarry 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x127, Return x68, Return x128);
+ { uint32_t x132 = (x79 & 0xfffffff);
+ { uint32_t x134, uint8_t x135 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x131, Return x71, Return x132);
+ { uint32_t x136 = (x79 & 0xfffffff);
+ { uint32_t x138, uint8_t x139 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x135, Return x74, Return x136);
+ { uint32_t x140 = (x79 & 0xfffffff);
+ { uint32_t x142, uint8_t _ = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x139, Return x77, Return x140);
+ out[0] = x82;
+ out[1] = x86;
+ out[2] = x90;
+ out[3] = x94;
+ out[4] = x98;
+ out[5] = x102;
+ out[6] = x106;
+ out[7] = x110;
+ out[8] = x114;
+ out[9] = x118;
+ out[10] = x122;
+ out[11] = x126;
+ out[12] = x130;
+ out[13] = x134;
+ out[14] = x138;
+ out[15] = x142;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e480m2e240m1/femul.c b/src/Specific/solinas32_2e480m2e240m1/femul.c
index 2fda72b70..902c06227 100644
--- a/src/Specific/solinas32_2e480m2e240m1/femul.c
+++ b/src/Specific/solinas32_2e480m2e240m1/femul.c
@@ -1,131 +1,145 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
-{ ℤ x64 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)x19 + x32) *ℤ ((uint64_t)x49 + x62)) -ℤ ((uint64_t)x19 * x49)), ((((uint64_t)x19 * x62) +ℤ ((uint64_t)x32 * x49)) +ℤ ((uint64_t)x32 * x62)));
-{ ℤ x65 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x17 + x33) *ℤ ((uint64_t)x49 + x62)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x47 + x63))) -ℤ (((uint64_t)x17 * x49) +ℤ ((uint64_t)x19 * x47))), (((((uint64_t)x17 * x62) +ℤ ((uint64_t)x19 * x63)) +ℤ (((uint64_t)x33 * x49) +ℤ ((uint64_t)x32 * x47))) +ℤ (((uint64_t)x33 * x62) +ℤ ((uint64_t)x32 * x63))));
-{ ℤ x66 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x15 + x31) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x47 + x63)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x45 + x61)))) -ℤ (((uint64_t)x15 * x49) +ℤ (((uint64_t)x17 * x47) +ℤ ((uint64_t)x19 * x45)))), (((((uint64_t)x15 * x62) +ℤ (((uint64_t)x17 * x63) +ℤ ((uint64_t)x19 * x61))) +ℤ (((uint64_t)x31 * x49) +ℤ (((uint64_t)x33 * x47) +ℤ ((uint64_t)x32 * x45)))) +ℤ (((uint64_t)x31 * x62) +ℤ (((uint64_t)x33 * x63) +ℤ ((uint64_t)x32 * x61)))));
-{ ℤ x67 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x13 + x29) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x45 + x61)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x43 + x59))))) -ℤ (((uint64_t)x13 * x49) +ℤ (((uint64_t)x15 * x47) +ℤ (((uint64_t)x17 * x45) +ℤ ((uint64_t)x19 * x43))))), (((((uint64_t)x13 * x62) +ℤ (((uint64_t)x15 * x63) +ℤ (((uint64_t)x17 * x61) +ℤ ((uint64_t)x19 * x59)))) +ℤ (((uint64_t)x29 * x49) +ℤ (((uint64_t)x31 * x47) +ℤ (((uint64_t)x33 * x45) +ℤ ((uint64_t)x32 * x43))))) +ℤ (((uint64_t)x29 * x62) +ℤ (((uint64_t)x31 * x63) +ℤ (((uint64_t)x33 * x61) +ℤ ((uint64_t)x32 * x59))))));
-{ ℤ x68 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x11 + x27) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x43 + x59)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x41 + x57)))))) -ℤ (((uint64_t)x11 * x49) +ℤ (((uint64_t)x13 * x47) +ℤ (((uint64_t)x15 * x45) +ℤ (((uint64_t)x17 * x43) +ℤ ((uint64_t)x19 * x41)))))), (((((uint64_t)x11 * x62) +ℤ (((uint64_t)x13 * x63) +ℤ (((uint64_t)x15 * x61) +ℤ (((uint64_t)x17 * x59) +ℤ ((uint64_t)x19 * x57))))) +ℤ (((uint64_t)x27 * x49) +ℤ (((uint64_t)x29 * x47) +ℤ (((uint64_t)x31 * x45) +ℤ (((uint64_t)x33 * x43) +ℤ ((uint64_t)x32 * x41)))))) +ℤ (((uint64_t)x27 * x62) +ℤ (((uint64_t)x29 * x63) +ℤ (((uint64_t)x31 * x61) +ℤ (((uint64_t)x33 * x59) +ℤ ((uint64_t)x32 * x57)))))));
-{ ℤ x69 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x9 + x25) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x41 + x57)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x39 + x55))))))) -ℤ (((uint64_t)x9 * x49) +ℤ (((uint64_t)x11 * x47) +ℤ (((uint64_t)x13 * x45) +ℤ (((uint64_t)x15 * x43) +ℤ (((uint64_t)x17 * x41) +ℤ ((uint64_t)x19 * x39))))))), (((((uint64_t)x9 * x62) +ℤ (((uint64_t)x11 * x63) +ℤ (((uint64_t)x13 * x61) +ℤ (((uint64_t)x15 * x59) +ℤ (((uint64_t)x17 * x57) +ℤ ((uint64_t)x19 * x55)))))) +ℤ (((uint64_t)x25 * x49) +ℤ (((uint64_t)x27 * x47) +ℤ (((uint64_t)x29 * x45) +ℤ (((uint64_t)x31 * x43) +ℤ (((uint64_t)x33 * x41) +ℤ ((uint64_t)x32 * x39))))))) +ℤ (((uint64_t)x25 * x62) +ℤ (((uint64_t)x27 * x63) +ℤ (((uint64_t)x29 * x61) +ℤ (((uint64_t)x31 * x59) +ℤ (((uint64_t)x33 * x57) +ℤ ((uint64_t)x32 * x55))))))));
-{ ℤ x70 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x7 + x23) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x39 + x55)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x37 + x53)))))))) -ℤ (((uint64_t)x7 * x49) +ℤ (((uint64_t)x9 * x47) +ℤ (((uint64_t)x11 * x45) +ℤ (((uint64_t)x13 * x43) +ℤ (((uint64_t)x15 * x41) +ℤ (((uint64_t)x17 * x39) +ℤ ((uint64_t)x19 * x37)))))))), (((((uint64_t)x7 * x62) +ℤ (((uint64_t)x9 * x63) +ℤ (((uint64_t)x11 * x61) +ℤ (((uint64_t)x13 * x59) +ℤ (((uint64_t)x15 * x57) +ℤ (((uint64_t)x17 * x55) +ℤ ((uint64_t)x19 * x53))))))) +ℤ (((uint64_t)x23 * x49) +ℤ (((uint64_t)x25 * x47) +ℤ (((uint64_t)x27 * x45) +ℤ (((uint64_t)x29 * x43) +ℤ (((uint64_t)x31 * x41) +ℤ (((uint64_t)x33 * x39) +ℤ ((uint64_t)x32 * x37)))))))) +ℤ (((uint64_t)x23 * x62) +ℤ (((uint64_t)x25 * x63) +ℤ (((uint64_t)x27 * x61) +ℤ (((uint64_t)x29 * x59) +ℤ (((uint64_t)x31 * x57) +ℤ (((uint64_t)x33 * x55) +ℤ ((uint64_t)x32 * x53)))))))));
-{ ℤ x71 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x35 + x51))))))))) -ℤ (((uint64_t)x5 * x49) +ℤ (((uint64_t)x7 * x47) +ℤ (((uint64_t)x9 * x45) +ℤ (((uint64_t)x11 * x43) +ℤ (((uint64_t)x13 * x41) +ℤ (((uint64_t)x15 * x39) +ℤ (((uint64_t)x17 * x37) +ℤ ((uint64_t)x19 * x35))))))))), (((((uint64_t)x5 * x62) +ℤ (((uint64_t)x7 * x63) +ℤ (((uint64_t)x9 * x61) +ℤ (((uint64_t)x11 * x59) +ℤ (((uint64_t)x13 * x57) +ℤ (((uint64_t)x15 * x55) +ℤ (((uint64_t)x17 * x53) +ℤ ((uint64_t)x19 * x51)))))))) +ℤ (((uint64_t)x21 * x49) +ℤ (((uint64_t)x23 * x47) +ℤ (((uint64_t)x25 * x45) +ℤ (((uint64_t)x27 * x43) +ℤ (((uint64_t)x29 * x41) +ℤ (((uint64_t)x31 * x39) +ℤ (((uint64_t)x33 * x37) +ℤ ((uint64_t)x32 * x35))))))))) +ℤ (((uint64_t)x21 * x62) +ℤ (((uint64_t)x23 * x63) +ℤ (((uint64_t)x25 * x61) +ℤ (((uint64_t)x27 * x59) +ℤ (((uint64_t)x29 * x57) +ℤ (((uint64_t)x31 * x55) +ℤ (((uint64_t)x33 * x53) +ℤ ((uint64_t)x32 * x51))))))))));
-{ ℤ x72 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x17 + x33) *ℤ ((uint64_t)x35 + x51)))))))) -ℤ (((uint64_t)x5 * x47) +ℤ (((uint64_t)x7 * x45) +ℤ (((uint64_t)x9 * x43) +ℤ (((uint64_t)x11 * x41) +ℤ (((uint64_t)x13 * x39) +ℤ (((uint64_t)x15 * x37) +ℤ ((uint64_t)x17 * x35)))))))), (((((uint64_t)x5 * x63) +ℤ (((uint64_t)x7 * x61) +ℤ (((uint64_t)x9 * x59) +ℤ (((uint64_t)x11 * x57) +ℤ (((uint64_t)x13 * x55) +ℤ (((uint64_t)x15 * x53) +ℤ ((uint64_t)x17 * x51))))))) +ℤ (((uint64_t)x21 * x47) +ℤ (((uint64_t)x23 * x45) +ℤ (((uint64_t)x25 * x43) +ℤ (((uint64_t)x27 * x41) +ℤ (((uint64_t)x29 * x39) +ℤ (((uint64_t)x31 * x37) +ℤ ((uint64_t)x33 * x35)))))))) +ℤ (((uint64_t)x21 * x63) +ℤ (((uint64_t)x23 * x61) +ℤ (((uint64_t)x25 * x59) +ℤ (((uint64_t)x27 * x57) +ℤ (((uint64_t)x29 * x55) +ℤ (((uint64_t)x31 * x53) +ℤ ((uint64_t)x33 * x51)))))))));
-{ ℤ x73 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x15 + x31) *ℤ ((uint64_t)x35 + x51))))))) -ℤ (((uint64_t)x5 * x45) +ℤ (((uint64_t)x7 * x43) +ℤ (((uint64_t)x9 * x41) +ℤ (((uint64_t)x11 * x39) +ℤ (((uint64_t)x13 * x37) +ℤ ((uint64_t)x15 * x35))))))), (((((uint64_t)x5 * x61) +ℤ (((uint64_t)x7 * x59) +ℤ (((uint64_t)x9 * x57) +ℤ (((uint64_t)x11 * x55) +ℤ (((uint64_t)x13 * x53) +ℤ ((uint64_t)x15 * x51)))))) +ℤ (((uint64_t)x21 * x45) +ℤ (((uint64_t)x23 * x43) +ℤ (((uint64_t)x25 * x41) +ℤ (((uint64_t)x27 * x39) +ℤ (((uint64_t)x29 * x37) +ℤ ((uint64_t)x31 * x35))))))) +ℤ (((uint64_t)x21 * x61) +ℤ (((uint64_t)x23 * x59) +ℤ (((uint64_t)x25 * x57) +ℤ (((uint64_t)x27 * x55) +ℤ (((uint64_t)x29 * x53) +ℤ ((uint64_t)x31 * x51))))))));
-{ ℤ x74 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x13 + x29) *ℤ ((uint64_t)x35 + x51)))))) -ℤ (((uint64_t)x5 * x43) +ℤ (((uint64_t)x7 * x41) +ℤ (((uint64_t)x9 * x39) +ℤ (((uint64_t)x11 * x37) +ℤ ((uint64_t)x13 * x35)))))), (((((uint64_t)x5 * x59) +ℤ (((uint64_t)x7 * x57) +ℤ (((uint64_t)x9 * x55) +ℤ (((uint64_t)x11 * x53) +ℤ ((uint64_t)x13 * x51))))) +ℤ (((uint64_t)x21 * x43) +ℤ (((uint64_t)x23 * x41) +ℤ (((uint64_t)x25 * x39) +ℤ (((uint64_t)x27 * x37) +ℤ ((uint64_t)x29 * x35)))))) +ℤ (((uint64_t)x21 * x59) +ℤ (((uint64_t)x23 * x57) +ℤ (((uint64_t)x25 * x55) +ℤ (((uint64_t)x27 * x53) +ℤ ((uint64_t)x29 * x51)))))));
-{ ℤ x75 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x11 + x27) *ℤ ((uint64_t)x35 + x51))))) -ℤ (((uint64_t)x5 * x41) +ℤ (((uint64_t)x7 * x39) +ℤ (((uint64_t)x9 * x37) +ℤ ((uint64_t)x11 * x35))))), (((((uint64_t)x5 * x57) +ℤ (((uint64_t)x7 * x55) +ℤ (((uint64_t)x9 * x53) +ℤ ((uint64_t)x11 * x51)))) +ℤ (((uint64_t)x21 * x41) +ℤ (((uint64_t)x23 * x39) +ℤ (((uint64_t)x25 * x37) +ℤ ((uint64_t)x27 * x35))))) +ℤ (((uint64_t)x21 * x57) +ℤ (((uint64_t)x23 * x55) +ℤ (((uint64_t)x25 * x53) +ℤ ((uint64_t)x27 * x51))))));
-{ ℤ x76 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x9 + x25) *ℤ ((uint64_t)x35 + x51)))) -ℤ (((uint64_t)x5 * x39) +ℤ (((uint64_t)x7 * x37) +ℤ ((uint64_t)x9 * x35)))), (((((uint64_t)x5 * x55) +ℤ (((uint64_t)x7 * x53) +ℤ ((uint64_t)x9 * x51))) +ℤ (((uint64_t)x21 * x39) +ℤ (((uint64_t)x23 * x37) +ℤ ((uint64_t)x25 * x35)))) +ℤ (((uint64_t)x21 * x55) +ℤ (((uint64_t)x23 * x53) +ℤ ((uint64_t)x25 * x51)))));
-{ ℤ x77 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x7 + x23) *ℤ ((uint64_t)x35 + x51))) -ℤ (((uint64_t)x5 * x37) +ℤ ((uint64_t)x7 * x35))), (((((uint64_t)x5 * x53) +ℤ ((uint64_t)x7 * x51)) +ℤ (((uint64_t)x21 * x37) +ℤ ((uint64_t)x23 * x35))) +ℤ (((uint64_t)x21 * x53) +ℤ ((uint64_t)x23 * x51))));
-{ ℤ x78 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)x5 + x21) *ℤ ((uint64_t)x35 + x51)) -ℤ ((uint64_t)x5 * x35)), ((((uint64_t)x5 * x51) +ℤ ((uint64_t)x21 * x35)) +ℤ ((uint64_t)x21 * x51)));
-{ ℤ x79 = (((((uint64_t)x19 * x49) +ℤ ((uint64_t)x32 * x62)) +ℤ x72) +ℤ x64);
-{ ℤ x80 = ((((((uint64_t)x17 * x49) +ℤ ((uint64_t)x19 * x47)) +ℤ (((uint64_t)x33 * x62) +ℤ ((uint64_t)x32 * x63))) +ℤ x73) +ℤ x65);
-{ ℤ x81 = ((((((uint64_t)x15 * x49) +ℤ (((uint64_t)x17 * x47) +ℤ ((uint64_t)x19 * x45))) +ℤ (((uint64_t)x31 * x62) +ℤ (((uint64_t)x33 * x63) +ℤ ((uint64_t)x32 * x61)))) +ℤ x74) +ℤ x66);
-{ ℤ x82 = ((((((uint64_t)x13 * x49) +ℤ (((uint64_t)x15 * x47) +ℤ (((uint64_t)x17 * x45) +ℤ ((uint64_t)x19 * x43)))) +ℤ (((uint64_t)x29 * x62) +ℤ (((uint64_t)x31 * x63) +ℤ (((uint64_t)x33 * x61) +ℤ ((uint64_t)x32 * x59))))) +ℤ x75) +ℤ x67);
-{ ℤ x83 = ((((((uint64_t)x11 * x49) +ℤ (((uint64_t)x13 * x47) +ℤ (((uint64_t)x15 * x45) +ℤ (((uint64_t)x17 * x43) +ℤ ((uint64_t)x19 * x41))))) +ℤ (((uint64_t)x27 * x62) +ℤ (((uint64_t)x29 * x63) +ℤ (((uint64_t)x31 * x61) +ℤ (((uint64_t)x33 * x59) +ℤ ((uint64_t)x32 * x57)))))) +ℤ x76) +ℤ x68);
-{ ℤ x84 = ((((((uint64_t)x9 * x49) +ℤ (((uint64_t)x11 * x47) +ℤ (((uint64_t)x13 * x45) +ℤ (((uint64_t)x15 * x43) +ℤ (((uint64_t)x17 * x41) +ℤ ((uint64_t)x19 * x39)))))) +ℤ (((uint64_t)x25 * x62) +ℤ (((uint64_t)x27 * x63) +ℤ (((uint64_t)x29 * x61) +ℤ (((uint64_t)x31 * x59) +ℤ (((uint64_t)x33 * x57) +ℤ ((uint64_t)x32 * x55))))))) +ℤ x77) +ℤ x69);
-{ ℤ x85 = ((((((uint64_t)x7 * x49) +ℤ (((uint64_t)x9 * x47) +ℤ (((uint64_t)x11 * x45) +ℤ (((uint64_t)x13 * x43) +ℤ (((uint64_t)x15 * x41) +ℤ (((uint64_t)x17 * x39) +ℤ ((uint64_t)x19 * x37))))))) +ℤ (((uint64_t)x23 * x62) +ℤ (((uint64_t)x25 * x63) +ℤ (((uint64_t)x27 * x61) +ℤ (((uint64_t)x29 * x59) +ℤ (((uint64_t)x31 * x57) +ℤ (((uint64_t)x33 * x55) +ℤ ((uint64_t)x32 * x53)))))))) +ℤ x78) +ℤ x70);
-{ ℤ x86 = ((((uint64_t)x5 * x49) +ℤ (((uint64_t)x7 * x47) +ℤ (((uint64_t)x9 * x45) +ℤ (((uint64_t)x11 * x43) +ℤ (((uint64_t)x13 * x41) +ℤ (((uint64_t)x15 * x39) +ℤ (((uint64_t)x17 * x37) +ℤ ((uint64_t)x19 * x35)))))))) +ℤ (((uint64_t)x21 * x62) +ℤ (((uint64_t)x23 * x63) +ℤ (((uint64_t)x25 * x61) +ℤ (((uint64_t)x27 * x59) +ℤ (((uint64_t)x29 * x57) +ℤ (((uint64_t)x31 * x55) +ℤ (((uint64_t)x33 * x53) +ℤ ((uint64_t)x32 * x51)))))))));
-{ ℤ x87 = (((((uint64_t)x5 * x47) +ℤ (((uint64_t)x7 * x45) +ℤ (((uint64_t)x9 * x43) +ℤ (((uint64_t)x11 * x41) +ℤ (((uint64_t)x13 * x39) +ℤ (((uint64_t)x15 * x37) +ℤ ((uint64_t)x17 * x35))))))) +ℤ (((uint64_t)x21 * x63) +ℤ (((uint64_t)x23 * x61) +ℤ (((uint64_t)x25 * x59) +ℤ (((uint64_t)x27 * x57) +ℤ (((uint64_t)x29 * x55) +ℤ (((uint64_t)x31 * x53) +ℤ ((uint64_t)x33 * x51)))))))) +ℤ x64);
-{ ℤ x88 = (((((uint64_t)x5 * x45) +ℤ (((uint64_t)x7 * x43) +ℤ (((uint64_t)x9 * x41) +ℤ (((uint64_t)x11 * x39) +ℤ (((uint64_t)x13 * x37) +ℤ ((uint64_t)x15 * x35)))))) +ℤ (((uint64_t)x21 * x61) +ℤ (((uint64_t)x23 * x59) +ℤ (((uint64_t)x25 * x57) +ℤ (((uint64_t)x27 * x55) +ℤ (((uint64_t)x29 * x53) +ℤ ((uint64_t)x31 * x51))))))) +ℤ x65);
-{ ℤ x89 = (((((uint64_t)x5 * x43) +ℤ (((uint64_t)x7 * x41) +ℤ (((uint64_t)x9 * x39) +ℤ (((uint64_t)x11 * x37) +ℤ ((uint64_t)x13 * x35))))) +ℤ (((uint64_t)x21 * x59) +ℤ (((uint64_t)x23 * x57) +ℤ (((uint64_t)x25 * x55) +ℤ (((uint64_t)x27 * x53) +ℤ ((uint64_t)x29 * x51)))))) +ℤ x66);
-{ ℤ x90 = (((((uint64_t)x5 * x41) +ℤ (((uint64_t)x7 * x39) +ℤ (((uint64_t)x9 * x37) +ℤ ((uint64_t)x11 * x35)))) +ℤ (((uint64_t)x21 * x57) +ℤ (((uint64_t)x23 * x55) +ℤ (((uint64_t)x25 * x53) +ℤ ((uint64_t)x27 * x51))))) +ℤ x67);
-{ ℤ x91 = (((((uint64_t)x5 * x39) +ℤ (((uint64_t)x7 * x37) +ℤ ((uint64_t)x9 * x35))) +ℤ (((uint64_t)x21 * x55) +ℤ (((uint64_t)x23 * x53) +ℤ ((uint64_t)x25 * x51)))) +ℤ x68);
-{ ℤ x92 = (((((uint64_t)x5 * x37) +ℤ ((uint64_t)x7 * x35)) +ℤ (((uint64_t)x21 * x53) +ℤ ((uint64_t)x23 * x51))) +ℤ x69);
-{ ℤ x93 = ((((uint64_t)x5 * x35) +ℤ ((uint64_t)x21 * x51)) +ℤ x70);
-{ uint64_t x94 = (x86 >> 0x1e);
-{ uint32_t x95 = (x86 & 0x3fffffff);
-{ uint64_t x96 = (x71 >> 0x1e);
-{ uint32_t x97 = (x71 & 0x3fffffff);
-{ ℤ x98 = ((0x40000000 *ℤ x96) +ℤ x97);
-{ uint64_t x99 = (x98 >> 0x1e);
-{ uint32_t x100 = (x98 & 0x3fffffff);
-{ ℤ x101 = ((x94 +ℤ x85) +ℤ x99);
-{ uint64_t x102 = (x101 >> 0x1e);
-{ uint32_t x103 = (x101 & 0x3fffffff);
-{ ℤ x104 = (x93 +ℤ x99);
-{ uint64_t x105 = (x104 >> 0x1e);
-{ uint32_t x106 = (x104 & 0x3fffffff);
-{ ℤ x107 = (x102 +ℤ x84);
-{ uint64_t x108 = (x107 >> 0x1e);
-{ uint32_t x109 = (x107 & 0x3fffffff);
-{ ℤ x110 = (x105 +ℤ x92);
-{ uint64_t x111 = (x110 >> 0x1e);
-{ uint32_t x112 = (x110 & 0x3fffffff);
-{ ℤ x113 = (x108 +ℤ x83);
-{ uint64_t x114 = (x113 >> 0x1e);
-{ uint32_t x115 = (x113 & 0x3fffffff);
-{ ℤ x116 = (x111 +ℤ x91);
-{ uint64_t x117 = (x116 >> 0x1e);
-{ uint32_t x118 = (x116 & 0x3fffffff);
-{ ℤ x119 = (x114 +ℤ x82);
-{ uint64_t x120 = (x119 >> 0x1e);
-{ uint32_t x121 = (x119 & 0x3fffffff);
-{ ℤ x122 = (x117 +ℤ x90);
-{ uint64_t x123 = (x122 >> 0x1e);
-{ uint32_t x124 = (x122 & 0x3fffffff);
-{ ℤ x125 = (x120 +ℤ x81);
-{ uint64_t x126 = (x125 >> 0x1e);
-{ uint32_t x127 = (x125 & 0x3fffffff);
-{ ℤ x128 = (x123 +ℤ x89);
-{ uint64_t x129 = (x128 >> 0x1e);
-{ uint32_t x130 = (x128 & 0x3fffffff);
-{ ℤ x131 = (x126 +ℤ x80);
-{ uint64_t x132 = (x131 >> 0x1e);
-{ uint32_t x133 = (x131 & 0x3fffffff);
-{ ℤ x134 = (x129 +ℤ x88);
-{ uint64_t x135 = (x134 >> 0x1e);
-{ uint32_t x136 = (x134 & 0x3fffffff);
-{ ℤ x137 = (x132 +ℤ x79);
-{ uint64_t x138 = (x137 >> 0x1e);
-{ uint32_t x139 = (x137 & 0x3fffffff);
-{ ℤ x140 = (x135 +ℤ x87);
-{ uint64_t x141 = (x140 >> 0x1e);
-{ uint32_t x142 = (x140 & 0x3fffffff);
-{ uint64_t x143 = (x138 + x100);
-{ uint32_t x144 = (uint32_t) (x143 >> 0x1e);
-{ uint32_t x145 = ((uint32_t)x143 & 0x3fffffff);
-{ uint64_t x146 = (x141 + x95);
-{ uint32_t x147 = (uint32_t) (x146 >> 0x1e);
-{ uint32_t x148 = ((uint32_t)x146 & 0x3fffffff);
-{ uint64_t x149 = (((uint64_t)0x40000000 * x144) + x145);
-{ uint32_t x150 = (uint32_t) (x149 >> 0x1e);
-{ uint32_t x151 = ((uint32_t)x149 & 0x3fffffff);
-{ uint32_t x152 = ((x147 + x103) + x150);
-{ uint32_t x153 = (x152 >> 0x1e);
-{ uint32_t x154 = (x152 & 0x3fffffff);
-{ uint32_t x155 = (x106 + x150);
-{ uint32_t x156 = (x155 >> 0x1e);
-{ uint32_t x157 = (x155 & 0x3fffffff);
-out[0] = x151;
-out[1] = x139;
-out[2] = x133;
-out[3] = x127;
-out[4] = x121;
-out[5] = x115;
-out[6] = x153 + x109;
-out[7] = x154;
-out[8] = x148;
-out[9] = x142;
-out[10] = x136;
-out[11] = x130;
-out[12] = x124;
-out[13] = x118;
-out[14] = x156 + x112;
-out[15] = x157;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[16];
+static void femul(uint32_t out[16], const uint32_t in1[16], const uint32_t in2[16]) {
+ { const uint32_t x32 = in1[15];
+ { const uint32_t x33 = in1[14];
+ { const uint32_t x31 = in1[13];
+ { const uint32_t x29 = in1[12];
+ { const uint32_t x27 = in1[11];
+ { const uint32_t x25 = in1[10];
+ { const uint32_t x23 = in1[9];
+ { const uint32_t x21 = in1[8];
+ { const uint32_t x19 = in1[7];
+ { const uint32_t x17 = in1[6];
+ { const uint32_t x15 = in1[5];
+ { const uint32_t x13 = in1[4];
+ { const uint32_t x11 = in1[3];
+ { const uint32_t x9 = in1[2];
+ { const uint32_t x7 = in1[1];
+ { const uint32_t x5 = in1[0];
+ { const uint32_t x62 = in2[15];
+ { const uint32_t x63 = in2[14];
+ { const uint32_t x61 = in2[13];
+ { const uint32_t x59 = in2[12];
+ { const uint32_t x57 = in2[11];
+ { const uint32_t x55 = in2[10];
+ { const uint32_t x53 = in2[9];
+ { const uint32_t x51 = in2[8];
+ { const uint32_t x49 = in2[7];
+ { const uint32_t x47 = in2[6];
+ { const uint32_t x45 = in2[5];
+ { const uint32_t x43 = in2[4];
+ { const uint32_t x41 = in2[3];
+ { const uint32_t x39 = in2[2];
+ { const uint32_t x37 = in2[1];
+ { const uint32_t x35 = in2[0];
+ { ℤ x64 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)x19 + x32) *ℤ ((uint64_t)x49 + x62)) -ℤ ((uint64_t)x19 * x49)), ((((uint64_t)x19 * x62) +ℤ ((uint64_t)x32 * x49)) +ℤ ((uint64_t)x32 * x62)));
+ { ℤ x65 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x17 + x33) *ℤ ((uint64_t)x49 + x62)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x47 + x63))) -ℤ (((uint64_t)x17 * x49) +ℤ ((uint64_t)x19 * x47))), (((((uint64_t)x17 * x62) +ℤ ((uint64_t)x19 * x63)) +ℤ (((uint64_t)x33 * x49) +ℤ ((uint64_t)x32 * x47))) +ℤ (((uint64_t)x33 * x62) +ℤ ((uint64_t)x32 * x63))));
+ { ℤ x66 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x15 + x31) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x47 + x63)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x45 + x61)))) -ℤ (((uint64_t)x15 * x49) +ℤ (((uint64_t)x17 * x47) +ℤ ((uint64_t)x19 * x45)))), (((((uint64_t)x15 * x62) +ℤ (((uint64_t)x17 * x63) +ℤ ((uint64_t)x19 * x61))) +ℤ (((uint64_t)x31 * x49) +ℤ (((uint64_t)x33 * x47) +ℤ ((uint64_t)x32 * x45)))) +ℤ (((uint64_t)x31 * x62) +ℤ (((uint64_t)x33 * x63) +ℤ ((uint64_t)x32 * x61)))));
+ { ℤ x67 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x13 + x29) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x45 + x61)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x43 + x59))))) -ℤ (((uint64_t)x13 * x49) +ℤ (((uint64_t)x15 * x47) +ℤ (((uint64_t)x17 * x45) +ℤ ((uint64_t)x19 * x43))))), (((((uint64_t)x13 * x62) +ℤ (((uint64_t)x15 * x63) +ℤ (((uint64_t)x17 * x61) +ℤ ((uint64_t)x19 * x59)))) +ℤ (((uint64_t)x29 * x49) +ℤ (((uint64_t)x31 * x47) +ℤ (((uint64_t)x33 * x45) +ℤ ((uint64_t)x32 * x43))))) +ℤ (((uint64_t)x29 * x62) +ℤ (((uint64_t)x31 * x63) +ℤ (((uint64_t)x33 * x61) +ℤ ((uint64_t)x32 * x59))))));
+ { ℤ x68 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x11 + x27) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x43 + x59)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x41 + x57)))))) -ℤ (((uint64_t)x11 * x49) +ℤ (((uint64_t)x13 * x47) +ℤ (((uint64_t)x15 * x45) +ℤ (((uint64_t)x17 * x43) +ℤ ((uint64_t)x19 * x41)))))), (((((uint64_t)x11 * x62) +ℤ (((uint64_t)x13 * x63) +ℤ (((uint64_t)x15 * x61) +ℤ (((uint64_t)x17 * x59) +ℤ ((uint64_t)x19 * x57))))) +ℤ (((uint64_t)x27 * x49) +ℤ (((uint64_t)x29 * x47) +ℤ (((uint64_t)x31 * x45) +ℤ (((uint64_t)x33 * x43) +ℤ ((uint64_t)x32 * x41)))))) +ℤ (((uint64_t)x27 * x62) +ℤ (((uint64_t)x29 * x63) +ℤ (((uint64_t)x31 * x61) +ℤ (((uint64_t)x33 * x59) +ℤ ((uint64_t)x32 * x57)))))));
+ { ℤ x69 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x9 + x25) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x41 + x57)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x39 + x55))))))) -ℤ (((uint64_t)x9 * x49) +ℤ (((uint64_t)x11 * x47) +ℤ (((uint64_t)x13 * x45) +ℤ (((uint64_t)x15 * x43) +ℤ (((uint64_t)x17 * x41) +ℤ ((uint64_t)x19 * x39))))))), (((((uint64_t)x9 * x62) +ℤ (((uint64_t)x11 * x63) +ℤ (((uint64_t)x13 * x61) +ℤ (((uint64_t)x15 * x59) +ℤ (((uint64_t)x17 * x57) +ℤ ((uint64_t)x19 * x55)))))) +ℤ (((uint64_t)x25 * x49) +ℤ (((uint64_t)x27 * x47) +ℤ (((uint64_t)x29 * x45) +ℤ (((uint64_t)x31 * x43) +ℤ (((uint64_t)x33 * x41) +ℤ ((uint64_t)x32 * x39))))))) +ℤ (((uint64_t)x25 * x62) +ℤ (((uint64_t)x27 * x63) +ℤ (((uint64_t)x29 * x61) +ℤ (((uint64_t)x31 * x59) +ℤ (((uint64_t)x33 * x57) +ℤ ((uint64_t)x32 * x55))))))));
+ { ℤ x70 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x7 + x23) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x39 + x55)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x37 + x53)))))))) -ℤ (((uint64_t)x7 * x49) +ℤ (((uint64_t)x9 * x47) +ℤ (((uint64_t)x11 * x45) +ℤ (((uint64_t)x13 * x43) +ℤ (((uint64_t)x15 * x41) +ℤ (((uint64_t)x17 * x39) +ℤ ((uint64_t)x19 * x37)))))))), (((((uint64_t)x7 * x62) +ℤ (((uint64_t)x9 * x63) +ℤ (((uint64_t)x11 * x61) +ℤ (((uint64_t)x13 * x59) +ℤ (((uint64_t)x15 * x57) +ℤ (((uint64_t)x17 * x55) +ℤ ((uint64_t)x19 * x53))))))) +ℤ (((uint64_t)x23 * x49) +ℤ (((uint64_t)x25 * x47) +ℤ (((uint64_t)x27 * x45) +ℤ (((uint64_t)x29 * x43) +ℤ (((uint64_t)x31 * x41) +ℤ (((uint64_t)x33 * x39) +ℤ ((uint64_t)x32 * x37)))))))) +ℤ (((uint64_t)x23 * x62) +ℤ (((uint64_t)x25 * x63) +ℤ (((uint64_t)x27 * x61) +ℤ (((uint64_t)x29 * x59) +ℤ (((uint64_t)x31 * x57) +ℤ (((uint64_t)x33 * x55) +ℤ ((uint64_t)x32 * x53)))))))));
+ { ℤ x71 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x35 + x51))))))))) -ℤ (((uint64_t)x5 * x49) +ℤ (((uint64_t)x7 * x47) +ℤ (((uint64_t)x9 * x45) +ℤ (((uint64_t)x11 * x43) +ℤ (((uint64_t)x13 * x41) +ℤ (((uint64_t)x15 * x39) +ℤ (((uint64_t)x17 * x37) +ℤ ((uint64_t)x19 * x35))))))))), (((((uint64_t)x5 * x62) +ℤ (((uint64_t)x7 * x63) +ℤ (((uint64_t)x9 * x61) +ℤ (((uint64_t)x11 * x59) +ℤ (((uint64_t)x13 * x57) +ℤ (((uint64_t)x15 * x55) +ℤ (((uint64_t)x17 * x53) +ℤ ((uint64_t)x19 * x51)))))))) +ℤ (((uint64_t)x21 * x49) +ℤ (((uint64_t)x23 * x47) +ℤ (((uint64_t)x25 * x45) +ℤ (((uint64_t)x27 * x43) +ℤ (((uint64_t)x29 * x41) +ℤ (((uint64_t)x31 * x39) +ℤ (((uint64_t)x33 * x37) +ℤ ((uint64_t)x32 * x35))))))))) +ℤ (((uint64_t)x21 * x62) +ℤ (((uint64_t)x23 * x63) +ℤ (((uint64_t)x25 * x61) +ℤ (((uint64_t)x27 * x59) +ℤ (((uint64_t)x29 * x57) +ℤ (((uint64_t)x31 * x55) +ℤ (((uint64_t)x33 * x53) +ℤ ((uint64_t)x32 * x51))))))))));
+ { ℤ x72 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x17 + x33) *ℤ ((uint64_t)x35 + x51)))))))) -ℤ (((uint64_t)x5 * x47) +ℤ (((uint64_t)x7 * x45) +ℤ (((uint64_t)x9 * x43) +ℤ (((uint64_t)x11 * x41) +ℤ (((uint64_t)x13 * x39) +ℤ (((uint64_t)x15 * x37) +ℤ ((uint64_t)x17 * x35)))))))), (((((uint64_t)x5 * x63) +ℤ (((uint64_t)x7 * x61) +ℤ (((uint64_t)x9 * x59) +ℤ (((uint64_t)x11 * x57) +ℤ (((uint64_t)x13 * x55) +ℤ (((uint64_t)x15 * x53) +ℤ ((uint64_t)x17 * x51))))))) +ℤ (((uint64_t)x21 * x47) +ℤ (((uint64_t)x23 * x45) +ℤ (((uint64_t)x25 * x43) +ℤ (((uint64_t)x27 * x41) +ℤ (((uint64_t)x29 * x39) +ℤ (((uint64_t)x31 * x37) +ℤ ((uint64_t)x33 * x35)))))))) +ℤ (((uint64_t)x21 * x63) +ℤ (((uint64_t)x23 * x61) +ℤ (((uint64_t)x25 * x59) +ℤ (((uint64_t)x27 * x57) +ℤ (((uint64_t)x29 * x55) +ℤ (((uint64_t)x31 * x53) +ℤ ((uint64_t)x33 * x51)))))))));
+ { ℤ x73 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x15 + x31) *ℤ ((uint64_t)x35 + x51))))))) -ℤ (((uint64_t)x5 * x45) +ℤ (((uint64_t)x7 * x43) +ℤ (((uint64_t)x9 * x41) +ℤ (((uint64_t)x11 * x39) +ℤ (((uint64_t)x13 * x37) +ℤ ((uint64_t)x15 * x35))))))), (((((uint64_t)x5 * x61) +ℤ (((uint64_t)x7 * x59) +ℤ (((uint64_t)x9 * x57) +ℤ (((uint64_t)x11 * x55) +ℤ (((uint64_t)x13 * x53) +ℤ ((uint64_t)x15 * x51)))))) +ℤ (((uint64_t)x21 * x45) +ℤ (((uint64_t)x23 * x43) +ℤ (((uint64_t)x25 * x41) +ℤ (((uint64_t)x27 * x39) +ℤ (((uint64_t)x29 * x37) +ℤ ((uint64_t)x31 * x35))))))) +ℤ (((uint64_t)x21 * x61) +ℤ (((uint64_t)x23 * x59) +ℤ (((uint64_t)x25 * x57) +ℤ (((uint64_t)x27 * x55) +ℤ (((uint64_t)x29 * x53) +ℤ ((uint64_t)x31 * x51))))))));
+ { ℤ x74 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x13 + x29) *ℤ ((uint64_t)x35 + x51)))))) -ℤ (((uint64_t)x5 * x43) +ℤ (((uint64_t)x7 * x41) +ℤ (((uint64_t)x9 * x39) +ℤ (((uint64_t)x11 * x37) +ℤ ((uint64_t)x13 * x35)))))), (((((uint64_t)x5 * x59) +ℤ (((uint64_t)x7 * x57) +ℤ (((uint64_t)x9 * x55) +ℤ (((uint64_t)x11 * x53) +ℤ ((uint64_t)x13 * x51))))) +ℤ (((uint64_t)x21 * x43) +ℤ (((uint64_t)x23 * x41) +ℤ (((uint64_t)x25 * x39) +ℤ (((uint64_t)x27 * x37) +ℤ ((uint64_t)x29 * x35)))))) +ℤ (((uint64_t)x21 * x59) +ℤ (((uint64_t)x23 * x57) +ℤ (((uint64_t)x25 * x55) +ℤ (((uint64_t)x27 * x53) +ℤ ((uint64_t)x29 * x51)))))));
+ { ℤ x75 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x11 + x27) *ℤ ((uint64_t)x35 + x51))))) -ℤ (((uint64_t)x5 * x41) +ℤ (((uint64_t)x7 * x39) +ℤ (((uint64_t)x9 * x37) +ℤ ((uint64_t)x11 * x35))))), (((((uint64_t)x5 * x57) +ℤ (((uint64_t)x7 * x55) +ℤ (((uint64_t)x9 * x53) +ℤ ((uint64_t)x11 * x51)))) +ℤ (((uint64_t)x21 * x41) +ℤ (((uint64_t)x23 * x39) +ℤ (((uint64_t)x25 * x37) +ℤ ((uint64_t)x27 * x35))))) +ℤ (((uint64_t)x21 * x57) +ℤ (((uint64_t)x23 * x55) +ℤ (((uint64_t)x25 * x53) +ℤ ((uint64_t)x27 * x51))))));
+ { ℤ x76 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x9 + x25) *ℤ ((uint64_t)x35 + x51)))) -ℤ (((uint64_t)x5 * x39) +ℤ (((uint64_t)x7 * x37) +ℤ ((uint64_t)x9 * x35)))), (((((uint64_t)x5 * x55) +ℤ (((uint64_t)x7 * x53) +ℤ ((uint64_t)x9 * x51))) +ℤ (((uint64_t)x21 * x39) +ℤ (((uint64_t)x23 * x37) +ℤ ((uint64_t)x25 * x35)))) +ℤ (((uint64_t)x21 * x55) +ℤ (((uint64_t)x23 * x53) +ℤ ((uint64_t)x25 * x51)))));
+ { ℤ x77 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x7 + x23) *ℤ ((uint64_t)x35 + x51))) -ℤ (((uint64_t)x5 * x37) +ℤ ((uint64_t)x7 * x35))), (((((uint64_t)x5 * x53) +ℤ ((uint64_t)x7 * x51)) +ℤ (((uint64_t)x21 * x37) +ℤ ((uint64_t)x23 * x35))) +ℤ (((uint64_t)x21 * x53) +ℤ ((uint64_t)x23 * x51))));
+ { ℤ x78 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)x5 + x21) *ℤ ((uint64_t)x35 + x51)) -ℤ ((uint64_t)x5 * x35)), ((((uint64_t)x5 * x51) +ℤ ((uint64_t)x21 * x35)) +ℤ ((uint64_t)x21 * x51)));
+ { ℤ x79 = (((((uint64_t)x19 * x49) +ℤ ((uint64_t)x32 * x62)) +ℤ x72) +ℤ x64);
+ { ℤ x80 = ((((((uint64_t)x17 * x49) +ℤ ((uint64_t)x19 * x47)) +ℤ (((uint64_t)x33 * x62) +ℤ ((uint64_t)x32 * x63))) +ℤ x73) +ℤ x65);
+ { ℤ x81 = ((((((uint64_t)x15 * x49) +ℤ (((uint64_t)x17 * x47) +ℤ ((uint64_t)x19 * x45))) +ℤ (((uint64_t)x31 * x62) +ℤ (((uint64_t)x33 * x63) +ℤ ((uint64_t)x32 * x61)))) +ℤ x74) +ℤ x66);
+ { ℤ x82 = ((((((uint64_t)x13 * x49) +ℤ (((uint64_t)x15 * x47) +ℤ (((uint64_t)x17 * x45) +ℤ ((uint64_t)x19 * x43)))) +ℤ (((uint64_t)x29 * x62) +ℤ (((uint64_t)x31 * x63) +ℤ (((uint64_t)x33 * x61) +ℤ ((uint64_t)x32 * x59))))) +ℤ x75) +ℤ x67);
+ { ℤ x83 = ((((((uint64_t)x11 * x49) +ℤ (((uint64_t)x13 * x47) +ℤ (((uint64_t)x15 * x45) +ℤ (((uint64_t)x17 * x43) +ℤ ((uint64_t)x19 * x41))))) +ℤ (((uint64_t)x27 * x62) +ℤ (((uint64_t)x29 * x63) +ℤ (((uint64_t)x31 * x61) +ℤ (((uint64_t)x33 * x59) +ℤ ((uint64_t)x32 * x57)))))) +ℤ x76) +ℤ x68);
+ { ℤ x84 = ((((((uint64_t)x9 * x49) +ℤ (((uint64_t)x11 * x47) +ℤ (((uint64_t)x13 * x45) +ℤ (((uint64_t)x15 * x43) +ℤ (((uint64_t)x17 * x41) +ℤ ((uint64_t)x19 * x39)))))) +ℤ (((uint64_t)x25 * x62) +ℤ (((uint64_t)x27 * x63) +ℤ (((uint64_t)x29 * x61) +ℤ (((uint64_t)x31 * x59) +ℤ (((uint64_t)x33 * x57) +ℤ ((uint64_t)x32 * x55))))))) +ℤ x77) +ℤ x69);
+ { ℤ x85 = ((((((uint64_t)x7 * x49) +ℤ (((uint64_t)x9 * x47) +ℤ (((uint64_t)x11 * x45) +ℤ (((uint64_t)x13 * x43) +ℤ (((uint64_t)x15 * x41) +ℤ (((uint64_t)x17 * x39) +ℤ ((uint64_t)x19 * x37))))))) +ℤ (((uint64_t)x23 * x62) +ℤ (((uint64_t)x25 * x63) +ℤ (((uint64_t)x27 * x61) +ℤ (((uint64_t)x29 * x59) +ℤ (((uint64_t)x31 * x57) +ℤ (((uint64_t)x33 * x55) +ℤ ((uint64_t)x32 * x53)))))))) +ℤ x78) +ℤ x70);
+ { ℤ x86 = ((((uint64_t)x5 * x49) +ℤ (((uint64_t)x7 * x47) +ℤ (((uint64_t)x9 * x45) +ℤ (((uint64_t)x11 * x43) +ℤ (((uint64_t)x13 * x41) +ℤ (((uint64_t)x15 * x39) +ℤ (((uint64_t)x17 * x37) +ℤ ((uint64_t)x19 * x35)))))))) +ℤ (((uint64_t)x21 * x62) +ℤ (((uint64_t)x23 * x63) +ℤ (((uint64_t)x25 * x61) +ℤ (((uint64_t)x27 * x59) +ℤ (((uint64_t)x29 * x57) +ℤ (((uint64_t)x31 * x55) +ℤ (((uint64_t)x33 * x53) +ℤ ((uint64_t)x32 * x51)))))))));
+ { ℤ x87 = (((((uint64_t)x5 * x47) +ℤ (((uint64_t)x7 * x45) +ℤ (((uint64_t)x9 * x43) +ℤ (((uint64_t)x11 * x41) +ℤ (((uint64_t)x13 * x39) +ℤ (((uint64_t)x15 * x37) +ℤ ((uint64_t)x17 * x35))))))) +ℤ (((uint64_t)x21 * x63) +ℤ (((uint64_t)x23 * x61) +ℤ (((uint64_t)x25 * x59) +ℤ (((uint64_t)x27 * x57) +ℤ (((uint64_t)x29 * x55) +ℤ (((uint64_t)x31 * x53) +ℤ ((uint64_t)x33 * x51)))))))) +ℤ x64);
+ { ℤ x88 = (((((uint64_t)x5 * x45) +ℤ (((uint64_t)x7 * x43) +ℤ (((uint64_t)x9 * x41) +ℤ (((uint64_t)x11 * x39) +ℤ (((uint64_t)x13 * x37) +ℤ ((uint64_t)x15 * x35)))))) +ℤ (((uint64_t)x21 * x61) +ℤ (((uint64_t)x23 * x59) +ℤ (((uint64_t)x25 * x57) +ℤ (((uint64_t)x27 * x55) +ℤ (((uint64_t)x29 * x53) +ℤ ((uint64_t)x31 * x51))))))) +ℤ x65);
+ { ℤ x89 = (((((uint64_t)x5 * x43) +ℤ (((uint64_t)x7 * x41) +ℤ (((uint64_t)x9 * x39) +ℤ (((uint64_t)x11 * x37) +ℤ ((uint64_t)x13 * x35))))) +ℤ (((uint64_t)x21 * x59) +ℤ (((uint64_t)x23 * x57) +ℤ (((uint64_t)x25 * x55) +ℤ (((uint64_t)x27 * x53) +ℤ ((uint64_t)x29 * x51)))))) +ℤ x66);
+ { ℤ x90 = (((((uint64_t)x5 * x41) +ℤ (((uint64_t)x7 * x39) +ℤ (((uint64_t)x9 * x37) +ℤ ((uint64_t)x11 * x35)))) +ℤ (((uint64_t)x21 * x57) +ℤ (((uint64_t)x23 * x55) +ℤ (((uint64_t)x25 * x53) +ℤ ((uint64_t)x27 * x51))))) +ℤ x67);
+ { ℤ x91 = (((((uint64_t)x5 * x39) +ℤ (((uint64_t)x7 * x37) +ℤ ((uint64_t)x9 * x35))) +ℤ (((uint64_t)x21 * x55) +ℤ (((uint64_t)x23 * x53) +ℤ ((uint64_t)x25 * x51)))) +ℤ x68);
+ { ℤ x92 = (((((uint64_t)x5 * x37) +ℤ ((uint64_t)x7 * x35)) +ℤ (((uint64_t)x21 * x53) +ℤ ((uint64_t)x23 * x51))) +ℤ x69);
+ { ℤ x93 = ((((uint64_t)x5 * x35) +ℤ ((uint64_t)x21 * x51)) +ℤ x70);
+ { uint64_t x94 = (x86 >> 0x1e);
+ { uint32_t x95 = (x86 & 0x3fffffff);
+ { uint64_t x96 = (x71 >> 0x1e);
+ { uint32_t x97 = (x71 & 0x3fffffff);
+ { ℤ x98 = ((0x40000000 *ℤ x96) +ℤ x97);
+ { uint64_t x99 = (x98 >> 0x1e);
+ { uint32_t x100 = (x98 & 0x3fffffff);
+ { ℤ x101 = ((x94 +ℤ x85) +ℤ x99);
+ { uint64_t x102 = (x101 >> 0x1e);
+ { uint32_t x103 = (x101 & 0x3fffffff);
+ { ℤ x104 = (x93 +ℤ x99);
+ { uint64_t x105 = (x104 >> 0x1e);
+ { uint32_t x106 = (x104 & 0x3fffffff);
+ { ℤ x107 = (x102 +ℤ x84);
+ { uint64_t x108 = (x107 >> 0x1e);
+ { uint32_t x109 = (x107 & 0x3fffffff);
+ { ℤ x110 = (x105 +ℤ x92);
+ { uint64_t x111 = (x110 >> 0x1e);
+ { uint32_t x112 = (x110 & 0x3fffffff);
+ { ℤ x113 = (x108 +ℤ x83);
+ { uint64_t x114 = (x113 >> 0x1e);
+ { uint32_t x115 = (x113 & 0x3fffffff);
+ { ℤ x116 = (x111 +ℤ x91);
+ { uint64_t x117 = (x116 >> 0x1e);
+ { uint32_t x118 = (x116 & 0x3fffffff);
+ { ℤ x119 = (x114 +ℤ x82);
+ { uint64_t x120 = (x119 >> 0x1e);
+ { uint32_t x121 = (x119 & 0x3fffffff);
+ { ℤ x122 = (x117 +ℤ x90);
+ { uint64_t x123 = (x122 >> 0x1e);
+ { uint32_t x124 = (x122 & 0x3fffffff);
+ { ℤ x125 = (x120 +ℤ x81);
+ { uint64_t x126 = (x125 >> 0x1e);
+ { uint32_t x127 = (x125 & 0x3fffffff);
+ { ℤ x128 = (x123 +ℤ x89);
+ { uint64_t x129 = (x128 >> 0x1e);
+ { uint32_t x130 = (x128 & 0x3fffffff);
+ { ℤ x131 = (x126 +ℤ x80);
+ { uint64_t x132 = (x131 >> 0x1e);
+ { uint32_t x133 = (x131 & 0x3fffffff);
+ { ℤ x134 = (x129 +ℤ x88);
+ { uint64_t x135 = (x134 >> 0x1e);
+ { uint32_t x136 = (x134 & 0x3fffffff);
+ { ℤ x137 = (x132 +ℤ x79);
+ { uint64_t x138 = (x137 >> 0x1e);
+ { uint32_t x139 = (x137 & 0x3fffffff);
+ { ℤ x140 = (x135 +ℤ x87);
+ { uint64_t x141 = (x140 >> 0x1e);
+ { uint32_t x142 = (x140 & 0x3fffffff);
+ { uint64_t x143 = (x138 + x100);
+ { uint32_t x144 = (uint32_t) (x143 >> 0x1e);
+ { uint32_t x145 = ((uint32_t)x143 & 0x3fffffff);
+ { uint64_t x146 = (x141 + x95);
+ { uint32_t x147 = (uint32_t) (x146 >> 0x1e);
+ { uint32_t x148 = ((uint32_t)x146 & 0x3fffffff);
+ { uint64_t x149 = (((uint64_t)0x40000000 * x144) + x145);
+ { uint32_t x150 = (uint32_t) (x149 >> 0x1e);
+ { uint32_t x151 = ((uint32_t)x149 & 0x3fffffff);
+ { uint32_t x152 = ((x147 + x103) + x150);
+ { uint32_t x153 = (x152 >> 0x1e);
+ { uint32_t x154 = (x152 & 0x3fffffff);
+ { uint32_t x155 = (x106 + x150);
+ { uint32_t x156 = (x155 >> 0x1e);
+ { uint32_t x157 = (x155 & 0x3fffffff);
+ out[0] = x157;
+ out[1] = (x156 + x112);
+ out[2] = x118;
+ out[3] = x124;
+ out[4] = x130;
+ out[5] = x136;
+ out[6] = x142;
+ out[7] = x148;
+ out[8] = x154;
+ out[9] = (x153 + x109);
+ out[10] = x115;
+ out[11] = x121;
+ out[12] = x127;
+ out[13] = x133;
+ out[14] = x139;
+ out[15] = x151;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e480m2e240m1/fesquare.c b/src/Specific/solinas32_2e480m2e240m1/fesquare.c
index 015bd9449..a8da3832f 100644
--- a/src/Specific/solinas32_2e480m2e240m1/fesquare.c
+++ b/src/Specific/solinas32_2e480m2e240m1/fesquare.c
@@ -1,131 +1,129 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ ℤ x31 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)x16 + x29) *ℤ ((uint64_t)x16 + x29)) -ℤ ((uint64_t)x16 * x16)), ((((uint64_t)x16 * x29) +ℤ ((uint64_t)x29 * x16)) +ℤ ((uint64_t)x29 * x29)));
-{ ℤ x32 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x14 + x30) *ℤ ((uint64_t)x16 + x29)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x14 + x30))) -ℤ (((uint64_t)x14 * x16) +ℤ ((uint64_t)x16 * x14))), (((((uint64_t)x14 * x29) +ℤ ((uint64_t)x16 * x30)) +ℤ (((uint64_t)x30 * x16) +ℤ ((uint64_t)x29 * x14))) +ℤ (((uint64_t)x30 * x29) +ℤ ((uint64_t)x29 * x30))));
-{ ℤ x33 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x12 + x28) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x14 + x30)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x12 + x28)))) -ℤ (((uint64_t)x12 * x16) +ℤ (((uint64_t)x14 * x14) +ℤ ((uint64_t)x16 * x12)))), (((((uint64_t)x12 * x29) +ℤ (((uint64_t)x14 * x30) +ℤ ((uint64_t)x16 * x28))) +ℤ (((uint64_t)x28 * x16) +ℤ (((uint64_t)x30 * x14) +ℤ ((uint64_t)x29 * x12)))) +ℤ (((uint64_t)x28 * x29) +ℤ (((uint64_t)x30 * x30) +ℤ ((uint64_t)x29 * x28)))));
-{ ℤ x34 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x10 + x26) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x12 + x28)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x10 + x26))))) -ℤ (((uint64_t)x10 * x16) +ℤ (((uint64_t)x12 * x14) +ℤ (((uint64_t)x14 * x12) +ℤ ((uint64_t)x16 * x10))))), (((((uint64_t)x10 * x29) +ℤ (((uint64_t)x12 * x30) +ℤ (((uint64_t)x14 * x28) +ℤ ((uint64_t)x16 * x26)))) +ℤ (((uint64_t)x26 * x16) +ℤ (((uint64_t)x28 * x14) +ℤ (((uint64_t)x30 * x12) +ℤ ((uint64_t)x29 * x10))))) +ℤ (((uint64_t)x26 * x29) +ℤ (((uint64_t)x28 * x30) +ℤ (((uint64_t)x30 * x28) +ℤ ((uint64_t)x29 * x26))))));
-{ ℤ x35 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x8 + x24) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x10 + x26)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x8 + x24)))))) -ℤ (((uint64_t)x8 * x16) +ℤ (((uint64_t)x10 * x14) +ℤ (((uint64_t)x12 * x12) +ℤ (((uint64_t)x14 * x10) +ℤ ((uint64_t)x16 * x8)))))), (((((uint64_t)x8 * x29) +ℤ (((uint64_t)x10 * x30) +ℤ (((uint64_t)x12 * x28) +ℤ (((uint64_t)x14 * x26) +ℤ ((uint64_t)x16 * x24))))) +ℤ (((uint64_t)x24 * x16) +ℤ (((uint64_t)x26 * x14) +ℤ (((uint64_t)x28 * x12) +ℤ (((uint64_t)x30 * x10) +ℤ ((uint64_t)x29 * x8)))))) +ℤ (((uint64_t)x24 * x29) +ℤ (((uint64_t)x26 * x30) +ℤ (((uint64_t)x28 * x28) +ℤ (((uint64_t)x30 * x26) +ℤ ((uint64_t)x29 * x24)))))));
-{ ℤ x36 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x6 + x22) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x8 + x24)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x6 + x22))))))) -ℤ (((uint64_t)x6 * x16) +ℤ (((uint64_t)x8 * x14) +ℤ (((uint64_t)x10 * x12) +ℤ (((uint64_t)x12 * x10) +ℤ (((uint64_t)x14 * x8) +ℤ ((uint64_t)x16 * x6))))))), (((((uint64_t)x6 * x29) +ℤ (((uint64_t)x8 * x30) +ℤ (((uint64_t)x10 * x28) +ℤ (((uint64_t)x12 * x26) +ℤ (((uint64_t)x14 * x24) +ℤ ((uint64_t)x16 * x22)))))) +ℤ (((uint64_t)x22 * x16) +ℤ (((uint64_t)x24 * x14) +ℤ (((uint64_t)x26 * x12) +ℤ (((uint64_t)x28 * x10) +ℤ (((uint64_t)x30 * x8) +ℤ ((uint64_t)x29 * x6))))))) +ℤ (((uint64_t)x22 * x29) +ℤ (((uint64_t)x24 * x30) +ℤ (((uint64_t)x26 * x28) +ℤ (((uint64_t)x28 * x26) +ℤ (((uint64_t)x30 * x24) +ℤ ((uint64_t)x29 * x22))))))));
-{ ℤ x37 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x4 + x20) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x6 + x22)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x4 + x20)))))))) -ℤ (((uint64_t)x4 * x16) +ℤ (((uint64_t)x6 * x14) +ℤ (((uint64_t)x8 * x12) +ℤ (((uint64_t)x10 * x10) +ℤ (((uint64_t)x12 * x8) +ℤ (((uint64_t)x14 * x6) +ℤ ((uint64_t)x16 * x4)))))))), (((((uint64_t)x4 * x29) +ℤ (((uint64_t)x6 * x30) +ℤ (((uint64_t)x8 * x28) +ℤ (((uint64_t)x10 * x26) +ℤ (((uint64_t)x12 * x24) +ℤ (((uint64_t)x14 * x22) +ℤ ((uint64_t)x16 * x20))))))) +ℤ (((uint64_t)x20 * x16) +ℤ (((uint64_t)x22 * x14) +ℤ (((uint64_t)x24 * x12) +ℤ (((uint64_t)x26 * x10) +ℤ (((uint64_t)x28 * x8) +ℤ (((uint64_t)x30 * x6) +ℤ ((uint64_t)x29 * x4)))))))) +ℤ (((uint64_t)x20 * x29) +ℤ (((uint64_t)x22 * x30) +ℤ (((uint64_t)x24 * x28) +ℤ (((uint64_t)x26 * x26) +ℤ (((uint64_t)x28 * x24) +ℤ (((uint64_t)x30 * x22) +ℤ ((uint64_t)x29 * x20)))))))));
-{ ℤ x38 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x2 + x18))))))))) -ℤ (((uint64_t)x2 * x16) +ℤ (((uint64_t)x4 * x14) +ℤ (((uint64_t)x6 * x12) +ℤ (((uint64_t)x8 * x10) +ℤ (((uint64_t)x10 * x8) +ℤ (((uint64_t)x12 * x6) +ℤ (((uint64_t)x14 * x4) +ℤ ((uint64_t)x16 * x2))))))))), (((((uint64_t)x2 * x29) +ℤ (((uint64_t)x4 * x30) +ℤ (((uint64_t)x6 * x28) +ℤ (((uint64_t)x8 * x26) +ℤ (((uint64_t)x10 * x24) +ℤ (((uint64_t)x12 * x22) +ℤ (((uint64_t)x14 * x20) +ℤ ((uint64_t)x16 * x18)))))))) +ℤ (((uint64_t)x18 * x16) +ℤ (((uint64_t)x20 * x14) +ℤ (((uint64_t)x22 * x12) +ℤ (((uint64_t)x24 * x10) +ℤ (((uint64_t)x26 * x8) +ℤ (((uint64_t)x28 * x6) +ℤ (((uint64_t)x30 * x4) +ℤ ((uint64_t)x29 * x2))))))))) +ℤ (((uint64_t)x18 * x29) +ℤ (((uint64_t)x20 * x30) +ℤ (((uint64_t)x22 * x28) +ℤ (((uint64_t)x24 * x26) +ℤ (((uint64_t)x26 * x24) +ℤ (((uint64_t)x28 * x22) +ℤ (((uint64_t)x30 * x20) +ℤ ((uint64_t)x29 * x18))))))))));
-{ ℤ x39 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x14 + x30) *ℤ ((uint64_t)x2 + x18)))))))) -ℤ (((uint64_t)x2 * x14) +ℤ (((uint64_t)x4 * x12) +ℤ (((uint64_t)x6 * x10) +ℤ (((uint64_t)x8 * x8) +ℤ (((uint64_t)x10 * x6) +ℤ (((uint64_t)x12 * x4) +ℤ ((uint64_t)x14 * x2)))))))), (((((uint64_t)x2 * x30) +ℤ (((uint64_t)x4 * x28) +ℤ (((uint64_t)x6 * x26) +ℤ (((uint64_t)x8 * x24) +ℤ (((uint64_t)x10 * x22) +ℤ (((uint64_t)x12 * x20) +ℤ ((uint64_t)x14 * x18))))))) +ℤ (((uint64_t)x18 * x14) +ℤ (((uint64_t)x20 * x12) +ℤ (((uint64_t)x22 * x10) +ℤ (((uint64_t)x24 * x8) +ℤ (((uint64_t)x26 * x6) +ℤ (((uint64_t)x28 * x4) +ℤ ((uint64_t)x30 * x2)))))))) +ℤ (((uint64_t)x18 * x30) +ℤ (((uint64_t)x20 * x28) +ℤ (((uint64_t)x22 * x26) +ℤ (((uint64_t)x24 * x24) +ℤ (((uint64_t)x26 * x22) +ℤ (((uint64_t)x28 * x20) +ℤ ((uint64_t)x30 * x18)))))))));
-{ ℤ x40 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x12 + x28) *ℤ ((uint64_t)x2 + x18))))))) -ℤ (((uint64_t)x2 * x12) +ℤ (((uint64_t)x4 * x10) +ℤ (((uint64_t)x6 * x8) +ℤ (((uint64_t)x8 * x6) +ℤ (((uint64_t)x10 * x4) +ℤ ((uint64_t)x12 * x2))))))), (((((uint64_t)x2 * x28) +ℤ (((uint64_t)x4 * x26) +ℤ (((uint64_t)x6 * x24) +ℤ (((uint64_t)x8 * x22) +ℤ (((uint64_t)x10 * x20) +ℤ ((uint64_t)x12 * x18)))))) +ℤ (((uint64_t)x18 * x12) +ℤ (((uint64_t)x20 * x10) +ℤ (((uint64_t)x22 * x8) +ℤ (((uint64_t)x24 * x6) +ℤ (((uint64_t)x26 * x4) +ℤ ((uint64_t)x28 * x2))))))) +ℤ (((uint64_t)x18 * x28) +ℤ (((uint64_t)x20 * x26) +ℤ (((uint64_t)x22 * x24) +ℤ (((uint64_t)x24 * x22) +ℤ (((uint64_t)x26 * x20) +ℤ ((uint64_t)x28 * x18))))))));
-{ ℤ x41 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x10 + x26) *ℤ ((uint64_t)x2 + x18)))))) -ℤ (((uint64_t)x2 * x10) +ℤ (((uint64_t)x4 * x8) +ℤ (((uint64_t)x6 * x6) +ℤ (((uint64_t)x8 * x4) +ℤ ((uint64_t)x10 * x2)))))), (((((uint64_t)x2 * x26) +ℤ (((uint64_t)x4 * x24) +ℤ (((uint64_t)x6 * x22) +ℤ (((uint64_t)x8 * x20) +ℤ ((uint64_t)x10 * x18))))) +ℤ (((uint64_t)x18 * x10) +ℤ (((uint64_t)x20 * x8) +ℤ (((uint64_t)x22 * x6) +ℤ (((uint64_t)x24 * x4) +ℤ ((uint64_t)x26 * x2)))))) +ℤ (((uint64_t)x18 * x26) +ℤ (((uint64_t)x20 * x24) +ℤ (((uint64_t)x22 * x22) +ℤ (((uint64_t)x24 * x20) +ℤ ((uint64_t)x26 * x18)))))));
-{ ℤ x42 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x8 + x24) *ℤ ((uint64_t)x2 + x18))))) -ℤ (((uint64_t)x2 * x8) +ℤ (((uint64_t)x4 * x6) +ℤ (((uint64_t)x6 * x4) +ℤ ((uint64_t)x8 * x2))))), (((((uint64_t)x2 * x24) +ℤ (((uint64_t)x4 * x22) +ℤ (((uint64_t)x6 * x20) +ℤ ((uint64_t)x8 * x18)))) +ℤ (((uint64_t)x18 * x8) +ℤ (((uint64_t)x20 * x6) +ℤ (((uint64_t)x22 * x4) +ℤ ((uint64_t)x24 * x2))))) +ℤ (((uint64_t)x18 * x24) +ℤ (((uint64_t)x20 * x22) +ℤ (((uint64_t)x22 * x20) +ℤ ((uint64_t)x24 * x18))))));
-{ ℤ x43 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x6 + x22) *ℤ ((uint64_t)x2 + x18)))) -ℤ (((uint64_t)x2 * x6) +ℤ (((uint64_t)x4 * x4) +ℤ ((uint64_t)x6 * x2)))), (((((uint64_t)x2 * x22) +ℤ (((uint64_t)x4 * x20) +ℤ ((uint64_t)x6 * x18))) +ℤ (((uint64_t)x18 * x6) +ℤ (((uint64_t)x20 * x4) +ℤ ((uint64_t)x22 * x2)))) +ℤ (((uint64_t)x18 * x22) +ℤ (((uint64_t)x20 * x20) +ℤ ((uint64_t)x22 * x18)))));
-{ ℤ x44 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x4 + x20) *ℤ ((uint64_t)x2 + x18))) -ℤ (((uint64_t)x2 * x4) +ℤ ((uint64_t)x4 * x2))), (((((uint64_t)x2 * x20) +ℤ ((uint64_t)x4 * x18)) +ℤ (((uint64_t)x18 * x4) +ℤ ((uint64_t)x20 * x2))) +ℤ (((uint64_t)x18 * x20) +ℤ ((uint64_t)x20 * x18))));
-{ ℤ x45 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)x2 + x18) *ℤ ((uint64_t)x2 + x18)) -ℤ ((uint64_t)x2 * x2)), ((((uint64_t)x2 * x18) +ℤ ((uint64_t)x18 * x2)) +ℤ ((uint64_t)x18 * x18)));
-{ ℤ x46 = (((((uint64_t)x16 * x16) +ℤ ((uint64_t)x29 * x29)) +ℤ x39) +ℤ x31);
-{ ℤ x47 = ((((((uint64_t)x14 * x16) +ℤ ((uint64_t)x16 * x14)) +ℤ (((uint64_t)x30 * x29) +ℤ ((uint64_t)x29 * x30))) +ℤ x40) +ℤ x32);
-{ ℤ x48 = ((((((uint64_t)x12 * x16) +ℤ (((uint64_t)x14 * x14) +ℤ ((uint64_t)x16 * x12))) +ℤ (((uint64_t)x28 * x29) +ℤ (((uint64_t)x30 * x30) +ℤ ((uint64_t)x29 * x28)))) +ℤ x41) +ℤ x33);
-{ ℤ x49 = ((((((uint64_t)x10 * x16) +ℤ (((uint64_t)x12 * x14) +ℤ (((uint64_t)x14 * x12) +ℤ ((uint64_t)x16 * x10)))) +ℤ (((uint64_t)x26 * x29) +ℤ (((uint64_t)x28 * x30) +ℤ (((uint64_t)x30 * x28) +ℤ ((uint64_t)x29 * x26))))) +ℤ x42) +ℤ x34);
-{ ℤ x50 = ((((((uint64_t)x8 * x16) +ℤ (((uint64_t)x10 * x14) +ℤ (((uint64_t)x12 * x12) +ℤ (((uint64_t)x14 * x10) +ℤ ((uint64_t)x16 * x8))))) +ℤ (((uint64_t)x24 * x29) +ℤ (((uint64_t)x26 * x30) +ℤ (((uint64_t)x28 * x28) +ℤ (((uint64_t)x30 * x26) +ℤ ((uint64_t)x29 * x24)))))) +ℤ x43) +ℤ x35);
-{ ℤ x51 = ((((((uint64_t)x6 * x16) +ℤ (((uint64_t)x8 * x14) +ℤ (((uint64_t)x10 * x12) +ℤ (((uint64_t)x12 * x10) +ℤ (((uint64_t)x14 * x8) +ℤ ((uint64_t)x16 * x6)))))) +ℤ (((uint64_t)x22 * x29) +ℤ (((uint64_t)x24 * x30) +ℤ (((uint64_t)x26 * x28) +ℤ (((uint64_t)x28 * x26) +ℤ (((uint64_t)x30 * x24) +ℤ ((uint64_t)x29 * x22))))))) +ℤ x44) +ℤ x36);
-{ ℤ x52 = ((((((uint64_t)x4 * x16) +ℤ (((uint64_t)x6 * x14) +ℤ (((uint64_t)x8 * x12) +ℤ (((uint64_t)x10 * x10) +ℤ (((uint64_t)x12 * x8) +ℤ (((uint64_t)x14 * x6) +ℤ ((uint64_t)x16 * x4))))))) +ℤ (((uint64_t)x20 * x29) +ℤ (((uint64_t)x22 * x30) +ℤ (((uint64_t)x24 * x28) +ℤ (((uint64_t)x26 * x26) +ℤ (((uint64_t)x28 * x24) +ℤ (((uint64_t)x30 * x22) +ℤ ((uint64_t)x29 * x20)))))))) +ℤ x45) +ℤ x37);
-{ ℤ x53 = ((((uint64_t)x2 * x16) +ℤ (((uint64_t)x4 * x14) +ℤ (((uint64_t)x6 * x12) +ℤ (((uint64_t)x8 * x10) +ℤ (((uint64_t)x10 * x8) +ℤ (((uint64_t)x12 * x6) +ℤ (((uint64_t)x14 * x4) +ℤ ((uint64_t)x16 * x2)))))))) +ℤ (((uint64_t)x18 * x29) +ℤ (((uint64_t)x20 * x30) +ℤ (((uint64_t)x22 * x28) +ℤ (((uint64_t)x24 * x26) +ℤ (((uint64_t)x26 * x24) +ℤ (((uint64_t)x28 * x22) +ℤ (((uint64_t)x30 * x20) +ℤ ((uint64_t)x29 * x18)))))))));
-{ ℤ x54 = (((((uint64_t)x2 * x14) +ℤ (((uint64_t)x4 * x12) +ℤ (((uint64_t)x6 * x10) +ℤ (((uint64_t)x8 * x8) +ℤ (((uint64_t)x10 * x6) +ℤ (((uint64_t)x12 * x4) +ℤ ((uint64_t)x14 * x2))))))) +ℤ (((uint64_t)x18 * x30) +ℤ (((uint64_t)x20 * x28) +ℤ (((uint64_t)x22 * x26) +ℤ (((uint64_t)x24 * x24) +ℤ (((uint64_t)x26 * x22) +ℤ (((uint64_t)x28 * x20) +ℤ ((uint64_t)x30 * x18)))))))) +ℤ x31);
-{ ℤ x55 = (((((uint64_t)x2 * x12) +ℤ (((uint64_t)x4 * x10) +ℤ (((uint64_t)x6 * x8) +ℤ (((uint64_t)x8 * x6) +ℤ (((uint64_t)x10 * x4) +ℤ ((uint64_t)x12 * x2)))))) +ℤ (((uint64_t)x18 * x28) +ℤ (((uint64_t)x20 * x26) +ℤ (((uint64_t)x22 * x24) +ℤ (((uint64_t)x24 * x22) +ℤ (((uint64_t)x26 * x20) +ℤ ((uint64_t)x28 * x18))))))) +ℤ x32);
-{ ℤ x56 = (((((uint64_t)x2 * x10) +ℤ (((uint64_t)x4 * x8) +ℤ (((uint64_t)x6 * x6) +ℤ (((uint64_t)x8 * x4) +ℤ ((uint64_t)x10 * x2))))) +ℤ (((uint64_t)x18 * x26) +ℤ (((uint64_t)x20 * x24) +ℤ (((uint64_t)x22 * x22) +ℤ (((uint64_t)x24 * x20) +ℤ ((uint64_t)x26 * x18)))))) +ℤ x33);
-{ ℤ x57 = (((((uint64_t)x2 * x8) +ℤ (((uint64_t)x4 * x6) +ℤ (((uint64_t)x6 * x4) +ℤ ((uint64_t)x8 * x2)))) +ℤ (((uint64_t)x18 * x24) +ℤ (((uint64_t)x20 * x22) +ℤ (((uint64_t)x22 * x20) +ℤ ((uint64_t)x24 * x18))))) +ℤ x34);
-{ ℤ x58 = (((((uint64_t)x2 * x6) +ℤ (((uint64_t)x4 * x4) +ℤ ((uint64_t)x6 * x2))) +ℤ (((uint64_t)x18 * x22) +ℤ (((uint64_t)x20 * x20) +ℤ ((uint64_t)x22 * x18)))) +ℤ x35);
-{ ℤ x59 = (((((uint64_t)x2 * x4) +ℤ ((uint64_t)x4 * x2)) +ℤ (((uint64_t)x18 * x20) +ℤ ((uint64_t)x20 * x18))) +ℤ x36);
-{ ℤ x60 = ((((uint64_t)x2 * x2) +ℤ ((uint64_t)x18 * x18)) +ℤ x37);
-{ uint64_t x61 = (x53 >> 0x1e);
-{ uint32_t x62 = (x53 & 0x3fffffff);
-{ uint64_t x63 = (x38 >> 0x1e);
-{ uint32_t x64 = (x38 & 0x3fffffff);
-{ ℤ x65 = ((0x40000000 *ℤ x63) +ℤ x64);
-{ uint64_t x66 = (x65 >> 0x1e);
-{ uint32_t x67 = (x65 & 0x3fffffff);
-{ ℤ x68 = ((x61 +ℤ x52) +ℤ x66);
-{ uint64_t x69 = (x68 >> 0x1e);
-{ uint32_t x70 = (x68 & 0x3fffffff);
-{ ℤ x71 = (x60 +ℤ x66);
-{ uint64_t x72 = (x71 >> 0x1e);
-{ uint32_t x73 = (x71 & 0x3fffffff);
-{ ℤ x74 = (x69 +ℤ x51);
-{ uint64_t x75 = (x74 >> 0x1e);
-{ uint32_t x76 = (x74 & 0x3fffffff);
-{ ℤ x77 = (x72 +ℤ x59);
-{ uint64_t x78 = (x77 >> 0x1e);
-{ uint32_t x79 = (x77 & 0x3fffffff);
-{ ℤ x80 = (x75 +ℤ x50);
-{ uint64_t x81 = (x80 >> 0x1e);
-{ uint32_t x82 = (x80 & 0x3fffffff);
-{ ℤ x83 = (x78 +ℤ x58);
-{ uint64_t x84 = (x83 >> 0x1e);
-{ uint32_t x85 = (x83 & 0x3fffffff);
-{ ℤ x86 = (x81 +ℤ x49);
-{ uint64_t x87 = (x86 >> 0x1e);
-{ uint32_t x88 = (x86 & 0x3fffffff);
-{ ℤ x89 = (x84 +ℤ x57);
-{ uint64_t x90 = (x89 >> 0x1e);
-{ uint32_t x91 = (x89 & 0x3fffffff);
-{ ℤ x92 = (x87 +ℤ x48);
-{ uint64_t x93 = (x92 >> 0x1e);
-{ uint32_t x94 = (x92 & 0x3fffffff);
-{ ℤ x95 = (x90 +ℤ x56);
-{ uint64_t x96 = (x95 >> 0x1e);
-{ uint32_t x97 = (x95 & 0x3fffffff);
-{ ℤ x98 = (x93 +ℤ x47);
-{ uint64_t x99 = (x98 >> 0x1e);
-{ uint32_t x100 = (x98 & 0x3fffffff);
-{ ℤ x101 = (x96 +ℤ x55);
-{ uint64_t x102 = (x101 >> 0x1e);
-{ uint32_t x103 = (x101 & 0x3fffffff);
-{ ℤ x104 = (x99 +ℤ x46);
-{ uint64_t x105 = (x104 >> 0x1e);
-{ uint32_t x106 = (x104 & 0x3fffffff);
-{ ℤ x107 = (x102 +ℤ x54);
-{ uint64_t x108 = (x107 >> 0x1e);
-{ uint32_t x109 = (x107 & 0x3fffffff);
-{ uint64_t x110 = (x105 + x67);
-{ uint32_t x111 = (uint32_t) (x110 >> 0x1e);
-{ uint32_t x112 = ((uint32_t)x110 & 0x3fffffff);
-{ uint64_t x113 = (x108 + x62);
-{ uint32_t x114 = (uint32_t) (x113 >> 0x1e);
-{ uint32_t x115 = ((uint32_t)x113 & 0x3fffffff);
-{ uint64_t x116 = (((uint64_t)0x40000000 * x111) + x112);
-{ uint32_t x117 = (uint32_t) (x116 >> 0x1e);
-{ uint32_t x118 = ((uint32_t)x116 & 0x3fffffff);
-{ uint32_t x119 = ((x114 + x70) + x117);
-{ uint32_t x120 = (x119 >> 0x1e);
-{ uint32_t x121 = (x119 & 0x3fffffff);
-{ uint32_t x122 = (x73 + x117);
-{ uint32_t x123 = (x122 >> 0x1e);
-{ uint32_t x124 = (x122 & 0x3fffffff);
-out[0] = x118;
-out[1] = x106;
-out[2] = x100;
-out[3] = x94;
-out[4] = x88;
-out[5] = x82;
-out[6] = x120 + x76;
-out[7] = x121;
-out[8] = x115;
-out[9] = x109;
-out[10] = x103;
-out[11] = x97;
-out[12] = x91;
-out[13] = x85;
-out[14] = x123 + x79;
-out[15] = x124;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[16];
+static void fesquare(uint32_t out[16], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { ℤ x31 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)x16 + x29) *ℤ ((uint64_t)x16 + x29)) -ℤ ((uint64_t)x16 * x16)), ((((uint64_t)x16 * x29) +ℤ ((uint64_t)x29 * x16)) +ℤ ((uint64_t)x29 * x29)));
+ { ℤ x32 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x14 + x30) *ℤ ((uint64_t)x16 + x29)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x14 + x30))) -ℤ (((uint64_t)x14 * x16) +ℤ ((uint64_t)x16 * x14))), (((((uint64_t)x14 * x29) +ℤ ((uint64_t)x16 * x30)) +ℤ (((uint64_t)x30 * x16) +ℤ ((uint64_t)x29 * x14))) +ℤ (((uint64_t)x30 * x29) +ℤ ((uint64_t)x29 * x30))));
+ { ℤ x33 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x12 + x28) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x14 + x30)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x12 + x28)))) -ℤ (((uint64_t)x12 * x16) +ℤ (((uint64_t)x14 * x14) +ℤ ((uint64_t)x16 * x12)))), (((((uint64_t)x12 * x29) +ℤ (((uint64_t)x14 * x30) +ℤ ((uint64_t)x16 * x28))) +ℤ (((uint64_t)x28 * x16) +ℤ (((uint64_t)x30 * x14) +ℤ ((uint64_t)x29 * x12)))) +ℤ (((uint64_t)x28 * x29) +ℤ (((uint64_t)x30 * x30) +ℤ ((uint64_t)x29 * x28)))));
+ { ℤ x34 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x10 + x26) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x12 + x28)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x10 + x26))))) -ℤ (((uint64_t)x10 * x16) +ℤ (((uint64_t)x12 * x14) +ℤ (((uint64_t)x14 * x12) +ℤ ((uint64_t)x16 * x10))))), (((((uint64_t)x10 * x29) +ℤ (((uint64_t)x12 * x30) +ℤ (((uint64_t)x14 * x28) +ℤ ((uint64_t)x16 * x26)))) +ℤ (((uint64_t)x26 * x16) +ℤ (((uint64_t)x28 * x14) +ℤ (((uint64_t)x30 * x12) +ℤ ((uint64_t)x29 * x10))))) +ℤ (((uint64_t)x26 * x29) +ℤ (((uint64_t)x28 * x30) +ℤ (((uint64_t)x30 * x28) +ℤ ((uint64_t)x29 * x26))))));
+ { ℤ x35 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x8 + x24) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x10 + x26)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x8 + x24)))))) -ℤ (((uint64_t)x8 * x16) +ℤ (((uint64_t)x10 * x14) +ℤ (((uint64_t)x12 * x12) +ℤ (((uint64_t)x14 * x10) +ℤ ((uint64_t)x16 * x8)))))), (((((uint64_t)x8 * x29) +ℤ (((uint64_t)x10 * x30) +ℤ (((uint64_t)x12 * x28) +ℤ (((uint64_t)x14 * x26) +ℤ ((uint64_t)x16 * x24))))) +ℤ (((uint64_t)x24 * x16) +ℤ (((uint64_t)x26 * x14) +ℤ (((uint64_t)x28 * x12) +ℤ (((uint64_t)x30 * x10) +ℤ ((uint64_t)x29 * x8)))))) +ℤ (((uint64_t)x24 * x29) +ℤ (((uint64_t)x26 * x30) +ℤ (((uint64_t)x28 * x28) +ℤ (((uint64_t)x30 * x26) +ℤ ((uint64_t)x29 * x24)))))));
+ { ℤ x36 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x6 + x22) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x8 + x24)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x6 + x22))))))) -ℤ (((uint64_t)x6 * x16) +ℤ (((uint64_t)x8 * x14) +ℤ (((uint64_t)x10 * x12) +ℤ (((uint64_t)x12 * x10) +ℤ (((uint64_t)x14 * x8) +ℤ ((uint64_t)x16 * x6))))))), (((((uint64_t)x6 * x29) +ℤ (((uint64_t)x8 * x30) +ℤ (((uint64_t)x10 * x28) +ℤ (((uint64_t)x12 * x26) +ℤ (((uint64_t)x14 * x24) +ℤ ((uint64_t)x16 * x22)))))) +ℤ (((uint64_t)x22 * x16) +ℤ (((uint64_t)x24 * x14) +ℤ (((uint64_t)x26 * x12) +ℤ (((uint64_t)x28 * x10) +ℤ (((uint64_t)x30 * x8) +ℤ ((uint64_t)x29 * x6))))))) +ℤ (((uint64_t)x22 * x29) +ℤ (((uint64_t)x24 * x30) +ℤ (((uint64_t)x26 * x28) +ℤ (((uint64_t)x28 * x26) +ℤ (((uint64_t)x30 * x24) +ℤ ((uint64_t)x29 * x22))))))));
+ { ℤ x37 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x4 + x20) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x6 + x22)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x4 + x20)))))))) -ℤ (((uint64_t)x4 * x16) +ℤ (((uint64_t)x6 * x14) +ℤ (((uint64_t)x8 * x12) +ℤ (((uint64_t)x10 * x10) +ℤ (((uint64_t)x12 * x8) +ℤ (((uint64_t)x14 * x6) +ℤ ((uint64_t)x16 * x4)))))))), (((((uint64_t)x4 * x29) +ℤ (((uint64_t)x6 * x30) +ℤ (((uint64_t)x8 * x28) +ℤ (((uint64_t)x10 * x26) +ℤ (((uint64_t)x12 * x24) +ℤ (((uint64_t)x14 * x22) +ℤ ((uint64_t)x16 * x20))))))) +ℤ (((uint64_t)x20 * x16) +ℤ (((uint64_t)x22 * x14) +ℤ (((uint64_t)x24 * x12) +ℤ (((uint64_t)x26 * x10) +ℤ (((uint64_t)x28 * x8) +ℤ (((uint64_t)x30 * x6) +ℤ ((uint64_t)x29 * x4)))))))) +ℤ (((uint64_t)x20 * x29) +ℤ (((uint64_t)x22 * x30) +ℤ (((uint64_t)x24 * x28) +ℤ (((uint64_t)x26 * x26) +ℤ (((uint64_t)x28 * x24) +ℤ (((uint64_t)x30 * x22) +ℤ ((uint64_t)x29 * x20)))))))));
+ { ℤ x38 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x2 + x18))))))))) -ℤ (((uint64_t)x2 * x16) +ℤ (((uint64_t)x4 * x14) +ℤ (((uint64_t)x6 * x12) +ℤ (((uint64_t)x8 * x10) +ℤ (((uint64_t)x10 * x8) +ℤ (((uint64_t)x12 * x6) +ℤ (((uint64_t)x14 * x4) +ℤ ((uint64_t)x16 * x2))))))))), (((((uint64_t)x2 * x29) +ℤ (((uint64_t)x4 * x30) +ℤ (((uint64_t)x6 * x28) +ℤ (((uint64_t)x8 * x26) +ℤ (((uint64_t)x10 * x24) +ℤ (((uint64_t)x12 * x22) +ℤ (((uint64_t)x14 * x20) +ℤ ((uint64_t)x16 * x18)))))))) +ℤ (((uint64_t)x18 * x16) +ℤ (((uint64_t)x20 * x14) +ℤ (((uint64_t)x22 * x12) +ℤ (((uint64_t)x24 * x10) +ℤ (((uint64_t)x26 * x8) +ℤ (((uint64_t)x28 * x6) +ℤ (((uint64_t)x30 * x4) +ℤ ((uint64_t)x29 * x2))))))))) +ℤ (((uint64_t)x18 * x29) +ℤ (((uint64_t)x20 * x30) +ℤ (((uint64_t)x22 * x28) +ℤ (((uint64_t)x24 * x26) +ℤ (((uint64_t)x26 * x24) +ℤ (((uint64_t)x28 * x22) +ℤ (((uint64_t)x30 * x20) +ℤ ((uint64_t)x29 * x18))))))))));
+ { ℤ x39 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x14 + x30) *ℤ ((uint64_t)x2 + x18)))))))) -ℤ (((uint64_t)x2 * x14) +ℤ (((uint64_t)x4 * x12) +ℤ (((uint64_t)x6 * x10) +ℤ (((uint64_t)x8 * x8) +ℤ (((uint64_t)x10 * x6) +ℤ (((uint64_t)x12 * x4) +ℤ ((uint64_t)x14 * x2)))))))), (((((uint64_t)x2 * x30) +ℤ (((uint64_t)x4 * x28) +ℤ (((uint64_t)x6 * x26) +ℤ (((uint64_t)x8 * x24) +ℤ (((uint64_t)x10 * x22) +ℤ (((uint64_t)x12 * x20) +ℤ ((uint64_t)x14 * x18))))))) +ℤ (((uint64_t)x18 * x14) +ℤ (((uint64_t)x20 * x12) +ℤ (((uint64_t)x22 * x10) +ℤ (((uint64_t)x24 * x8) +ℤ (((uint64_t)x26 * x6) +ℤ (((uint64_t)x28 * x4) +ℤ ((uint64_t)x30 * x2)))))))) +ℤ (((uint64_t)x18 * x30) +ℤ (((uint64_t)x20 * x28) +ℤ (((uint64_t)x22 * x26) +ℤ (((uint64_t)x24 * x24) +ℤ (((uint64_t)x26 * x22) +ℤ (((uint64_t)x28 * x20) +ℤ ((uint64_t)x30 * x18)))))))));
+ { ℤ x40 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x12 + x28) *ℤ ((uint64_t)x2 + x18))))))) -ℤ (((uint64_t)x2 * x12) +ℤ (((uint64_t)x4 * x10) +ℤ (((uint64_t)x6 * x8) +ℤ (((uint64_t)x8 * x6) +ℤ (((uint64_t)x10 * x4) +ℤ ((uint64_t)x12 * x2))))))), (((((uint64_t)x2 * x28) +ℤ (((uint64_t)x4 * x26) +ℤ (((uint64_t)x6 * x24) +ℤ (((uint64_t)x8 * x22) +ℤ (((uint64_t)x10 * x20) +ℤ ((uint64_t)x12 * x18)))))) +ℤ (((uint64_t)x18 * x12) +ℤ (((uint64_t)x20 * x10) +ℤ (((uint64_t)x22 * x8) +ℤ (((uint64_t)x24 * x6) +ℤ (((uint64_t)x26 * x4) +ℤ ((uint64_t)x28 * x2))))))) +ℤ (((uint64_t)x18 * x28) +ℤ (((uint64_t)x20 * x26) +ℤ (((uint64_t)x22 * x24) +ℤ (((uint64_t)x24 * x22) +ℤ (((uint64_t)x26 * x20) +ℤ ((uint64_t)x28 * x18))))))));
+ { ℤ x41 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x10 + x26) *ℤ ((uint64_t)x2 + x18)))))) -ℤ (((uint64_t)x2 * x10) +ℤ (((uint64_t)x4 * x8) +ℤ (((uint64_t)x6 * x6) +ℤ (((uint64_t)x8 * x4) +ℤ ((uint64_t)x10 * x2)))))), (((((uint64_t)x2 * x26) +ℤ (((uint64_t)x4 * x24) +ℤ (((uint64_t)x6 * x22) +ℤ (((uint64_t)x8 * x20) +ℤ ((uint64_t)x10 * x18))))) +ℤ (((uint64_t)x18 * x10) +ℤ (((uint64_t)x20 * x8) +ℤ (((uint64_t)x22 * x6) +ℤ (((uint64_t)x24 * x4) +ℤ ((uint64_t)x26 * x2)))))) +ℤ (((uint64_t)x18 * x26) +ℤ (((uint64_t)x20 * x24) +ℤ (((uint64_t)x22 * x22) +ℤ (((uint64_t)x24 * x20) +ℤ ((uint64_t)x26 * x18)))))));
+ { ℤ x42 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x8 + x24) *ℤ ((uint64_t)x2 + x18))))) -ℤ (((uint64_t)x2 * x8) +ℤ (((uint64_t)x4 * x6) +ℤ (((uint64_t)x6 * x4) +ℤ ((uint64_t)x8 * x2))))), (((((uint64_t)x2 * x24) +ℤ (((uint64_t)x4 * x22) +ℤ (((uint64_t)x6 * x20) +ℤ ((uint64_t)x8 * x18)))) +ℤ (((uint64_t)x18 * x8) +ℤ (((uint64_t)x20 * x6) +ℤ (((uint64_t)x22 * x4) +ℤ ((uint64_t)x24 * x2))))) +ℤ (((uint64_t)x18 * x24) +ℤ (((uint64_t)x20 * x22) +ℤ (((uint64_t)x22 * x20) +ℤ ((uint64_t)x24 * x18))))));
+ { ℤ x43 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x6 + x22) *ℤ ((uint64_t)x2 + x18)))) -ℤ (((uint64_t)x2 * x6) +ℤ (((uint64_t)x4 * x4) +ℤ ((uint64_t)x6 * x2)))), (((((uint64_t)x2 * x22) +ℤ (((uint64_t)x4 * x20) +ℤ ((uint64_t)x6 * x18))) +ℤ (((uint64_t)x18 * x6) +ℤ (((uint64_t)x20 * x4) +ℤ ((uint64_t)x22 * x2)))) +ℤ (((uint64_t)x18 * x22) +ℤ (((uint64_t)x20 * x20) +ℤ ((uint64_t)x22 * x18)))));
+ { ℤ x44 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x4 + x20) *ℤ ((uint64_t)x2 + x18))) -ℤ (((uint64_t)x2 * x4) +ℤ ((uint64_t)x4 * x2))), (((((uint64_t)x2 * x20) +ℤ ((uint64_t)x4 * x18)) +ℤ (((uint64_t)x18 * x4) +ℤ ((uint64_t)x20 * x2))) +ℤ (((uint64_t)x18 * x20) +ℤ ((uint64_t)x20 * x18))));
+ { ℤ x45 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)x2 + x18) *ℤ ((uint64_t)x2 + x18)) -ℤ ((uint64_t)x2 * x2)), ((((uint64_t)x2 * x18) +ℤ ((uint64_t)x18 * x2)) +ℤ ((uint64_t)x18 * x18)));
+ { ℤ x46 = (((((uint64_t)x16 * x16) +ℤ ((uint64_t)x29 * x29)) +ℤ x39) +ℤ x31);
+ { ℤ x47 = ((((((uint64_t)x14 * x16) +ℤ ((uint64_t)x16 * x14)) +ℤ (((uint64_t)x30 * x29) +ℤ ((uint64_t)x29 * x30))) +ℤ x40) +ℤ x32);
+ { ℤ x48 = ((((((uint64_t)x12 * x16) +ℤ (((uint64_t)x14 * x14) +ℤ ((uint64_t)x16 * x12))) +ℤ (((uint64_t)x28 * x29) +ℤ (((uint64_t)x30 * x30) +ℤ ((uint64_t)x29 * x28)))) +ℤ x41) +ℤ x33);
+ { ℤ x49 = ((((((uint64_t)x10 * x16) +ℤ (((uint64_t)x12 * x14) +ℤ (((uint64_t)x14 * x12) +ℤ ((uint64_t)x16 * x10)))) +ℤ (((uint64_t)x26 * x29) +ℤ (((uint64_t)x28 * x30) +ℤ (((uint64_t)x30 * x28) +ℤ ((uint64_t)x29 * x26))))) +ℤ x42) +ℤ x34);
+ { ℤ x50 = ((((((uint64_t)x8 * x16) +ℤ (((uint64_t)x10 * x14) +ℤ (((uint64_t)x12 * x12) +ℤ (((uint64_t)x14 * x10) +ℤ ((uint64_t)x16 * x8))))) +ℤ (((uint64_t)x24 * x29) +ℤ (((uint64_t)x26 * x30) +ℤ (((uint64_t)x28 * x28) +ℤ (((uint64_t)x30 * x26) +ℤ ((uint64_t)x29 * x24)))))) +ℤ x43) +ℤ x35);
+ { ℤ x51 = ((((((uint64_t)x6 * x16) +ℤ (((uint64_t)x8 * x14) +ℤ (((uint64_t)x10 * x12) +ℤ (((uint64_t)x12 * x10) +ℤ (((uint64_t)x14 * x8) +ℤ ((uint64_t)x16 * x6)))))) +ℤ (((uint64_t)x22 * x29) +ℤ (((uint64_t)x24 * x30) +ℤ (((uint64_t)x26 * x28) +ℤ (((uint64_t)x28 * x26) +ℤ (((uint64_t)x30 * x24) +ℤ ((uint64_t)x29 * x22))))))) +ℤ x44) +ℤ x36);
+ { ℤ x52 = ((((((uint64_t)x4 * x16) +ℤ (((uint64_t)x6 * x14) +ℤ (((uint64_t)x8 * x12) +ℤ (((uint64_t)x10 * x10) +ℤ (((uint64_t)x12 * x8) +ℤ (((uint64_t)x14 * x6) +ℤ ((uint64_t)x16 * x4))))))) +ℤ (((uint64_t)x20 * x29) +ℤ (((uint64_t)x22 * x30) +ℤ (((uint64_t)x24 * x28) +ℤ (((uint64_t)x26 * x26) +ℤ (((uint64_t)x28 * x24) +ℤ (((uint64_t)x30 * x22) +ℤ ((uint64_t)x29 * x20)))))))) +ℤ x45) +ℤ x37);
+ { ℤ x53 = ((((uint64_t)x2 * x16) +ℤ (((uint64_t)x4 * x14) +ℤ (((uint64_t)x6 * x12) +ℤ (((uint64_t)x8 * x10) +ℤ (((uint64_t)x10 * x8) +ℤ (((uint64_t)x12 * x6) +ℤ (((uint64_t)x14 * x4) +ℤ ((uint64_t)x16 * x2)))))))) +ℤ (((uint64_t)x18 * x29) +ℤ (((uint64_t)x20 * x30) +ℤ (((uint64_t)x22 * x28) +ℤ (((uint64_t)x24 * x26) +ℤ (((uint64_t)x26 * x24) +ℤ (((uint64_t)x28 * x22) +ℤ (((uint64_t)x30 * x20) +ℤ ((uint64_t)x29 * x18)))))))));
+ { ℤ x54 = (((((uint64_t)x2 * x14) +ℤ (((uint64_t)x4 * x12) +ℤ (((uint64_t)x6 * x10) +ℤ (((uint64_t)x8 * x8) +ℤ (((uint64_t)x10 * x6) +ℤ (((uint64_t)x12 * x4) +ℤ ((uint64_t)x14 * x2))))))) +ℤ (((uint64_t)x18 * x30) +ℤ (((uint64_t)x20 * x28) +ℤ (((uint64_t)x22 * x26) +ℤ (((uint64_t)x24 * x24) +ℤ (((uint64_t)x26 * x22) +ℤ (((uint64_t)x28 * x20) +ℤ ((uint64_t)x30 * x18)))))))) +ℤ x31);
+ { ℤ x55 = (((((uint64_t)x2 * x12) +ℤ (((uint64_t)x4 * x10) +ℤ (((uint64_t)x6 * x8) +ℤ (((uint64_t)x8 * x6) +ℤ (((uint64_t)x10 * x4) +ℤ ((uint64_t)x12 * x2)))))) +ℤ (((uint64_t)x18 * x28) +ℤ (((uint64_t)x20 * x26) +ℤ (((uint64_t)x22 * x24) +ℤ (((uint64_t)x24 * x22) +ℤ (((uint64_t)x26 * x20) +ℤ ((uint64_t)x28 * x18))))))) +ℤ x32);
+ { ℤ x56 = (((((uint64_t)x2 * x10) +ℤ (((uint64_t)x4 * x8) +ℤ (((uint64_t)x6 * x6) +ℤ (((uint64_t)x8 * x4) +ℤ ((uint64_t)x10 * x2))))) +ℤ (((uint64_t)x18 * x26) +ℤ (((uint64_t)x20 * x24) +ℤ (((uint64_t)x22 * x22) +ℤ (((uint64_t)x24 * x20) +ℤ ((uint64_t)x26 * x18)))))) +ℤ x33);
+ { ℤ x57 = (((((uint64_t)x2 * x8) +ℤ (((uint64_t)x4 * x6) +ℤ (((uint64_t)x6 * x4) +ℤ ((uint64_t)x8 * x2)))) +ℤ (((uint64_t)x18 * x24) +ℤ (((uint64_t)x20 * x22) +ℤ (((uint64_t)x22 * x20) +ℤ ((uint64_t)x24 * x18))))) +ℤ x34);
+ { ℤ x58 = (((((uint64_t)x2 * x6) +ℤ (((uint64_t)x4 * x4) +ℤ ((uint64_t)x6 * x2))) +ℤ (((uint64_t)x18 * x22) +ℤ (((uint64_t)x20 * x20) +ℤ ((uint64_t)x22 * x18)))) +ℤ x35);
+ { ℤ x59 = (((((uint64_t)x2 * x4) +ℤ ((uint64_t)x4 * x2)) +ℤ (((uint64_t)x18 * x20) +ℤ ((uint64_t)x20 * x18))) +ℤ x36);
+ { ℤ x60 = ((((uint64_t)x2 * x2) +ℤ ((uint64_t)x18 * x18)) +ℤ x37);
+ { uint64_t x61 = (x53 >> 0x1e);
+ { uint32_t x62 = (x53 & 0x3fffffff);
+ { uint64_t x63 = (x38 >> 0x1e);
+ { uint32_t x64 = (x38 & 0x3fffffff);
+ { ℤ x65 = ((0x40000000 *ℤ x63) +ℤ x64);
+ { uint64_t x66 = (x65 >> 0x1e);
+ { uint32_t x67 = (x65 & 0x3fffffff);
+ { ℤ x68 = ((x61 +ℤ x52) +ℤ x66);
+ { uint64_t x69 = (x68 >> 0x1e);
+ { uint32_t x70 = (x68 & 0x3fffffff);
+ { ℤ x71 = (x60 +ℤ x66);
+ { uint64_t x72 = (x71 >> 0x1e);
+ { uint32_t x73 = (x71 & 0x3fffffff);
+ { ℤ x74 = (x69 +ℤ x51);
+ { uint64_t x75 = (x74 >> 0x1e);
+ { uint32_t x76 = (x74 & 0x3fffffff);
+ { ℤ x77 = (x72 +ℤ x59);
+ { uint64_t x78 = (x77 >> 0x1e);
+ { uint32_t x79 = (x77 & 0x3fffffff);
+ { ℤ x80 = (x75 +ℤ x50);
+ { uint64_t x81 = (x80 >> 0x1e);
+ { uint32_t x82 = (x80 & 0x3fffffff);
+ { ℤ x83 = (x78 +ℤ x58);
+ { uint64_t x84 = (x83 >> 0x1e);
+ { uint32_t x85 = (x83 & 0x3fffffff);
+ { ℤ x86 = (x81 +ℤ x49);
+ { uint64_t x87 = (x86 >> 0x1e);
+ { uint32_t x88 = (x86 & 0x3fffffff);
+ { ℤ x89 = (x84 +ℤ x57);
+ { uint64_t x90 = (x89 >> 0x1e);
+ { uint32_t x91 = (x89 & 0x3fffffff);
+ { ℤ x92 = (x87 +ℤ x48);
+ { uint64_t x93 = (x92 >> 0x1e);
+ { uint32_t x94 = (x92 & 0x3fffffff);
+ { ℤ x95 = (x90 +ℤ x56);
+ { uint64_t x96 = (x95 >> 0x1e);
+ { uint32_t x97 = (x95 & 0x3fffffff);
+ { ℤ x98 = (x93 +ℤ x47);
+ { uint64_t x99 = (x98 >> 0x1e);
+ { uint32_t x100 = (x98 & 0x3fffffff);
+ { ℤ x101 = (x96 +ℤ x55);
+ { uint64_t x102 = (x101 >> 0x1e);
+ { uint32_t x103 = (x101 & 0x3fffffff);
+ { ℤ x104 = (x99 +ℤ x46);
+ { uint64_t x105 = (x104 >> 0x1e);
+ { uint32_t x106 = (x104 & 0x3fffffff);
+ { ℤ x107 = (x102 +ℤ x54);
+ { uint64_t x108 = (x107 >> 0x1e);
+ { uint32_t x109 = (x107 & 0x3fffffff);
+ { uint64_t x110 = (x105 + x67);
+ { uint32_t x111 = (uint32_t) (x110 >> 0x1e);
+ { uint32_t x112 = ((uint32_t)x110 & 0x3fffffff);
+ { uint64_t x113 = (x108 + x62);
+ { uint32_t x114 = (uint32_t) (x113 >> 0x1e);
+ { uint32_t x115 = ((uint32_t)x113 & 0x3fffffff);
+ { uint64_t x116 = (((uint64_t)0x40000000 * x111) + x112);
+ { uint32_t x117 = (uint32_t) (x116 >> 0x1e);
+ { uint32_t x118 = ((uint32_t)x116 & 0x3fffffff);
+ { uint32_t x119 = ((x114 + x70) + x117);
+ { uint32_t x120 = (x119 >> 0x1e);
+ { uint32_t x121 = (x119 & 0x3fffffff);
+ { uint32_t x122 = (x73 + x117);
+ { uint32_t x123 = (x122 >> 0x1e);
+ { uint32_t x124 = (x122 & 0x3fffffff);
+ out[0] = x124;
+ out[1] = (x123 + x79);
+ out[2] = x85;
+ out[3] = x91;
+ out[4] = x97;
+ out[5] = x103;
+ out[6] = x109;
+ out[7] = x115;
+ out[8] = x121;
+ out[9] = (x120 + x76);
+ out[10] = x82;
+ out[11] = x88;
+ out[12] = x94;
+ out[13] = x100;
+ out[14] = x106;
+ out[15] = x118;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e480m2e240m1/freeze.c b/src/Specific/solinas32_2e480m2e240m1/freeze.c
index 6d0580ee4..8d6e8e351 100644
--- a/src/Specific/solinas32_2e480m2e240m1/freeze.c
+++ b/src/Specific/solinas32_2e480m2e240m1/freeze.c
@@ -1,25 +1,84 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint32_t x32;
-out[1] = uint8_t x33 = Op Syntax.SubWithGetBorrow 30 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3fffffff;;
+static void freeze(uint32_t out[16], const uint32_t in1[16]) {
+ { const uint32_t x29 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x3fffffff);
+ { uint32_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x4, 0x3fffffff);
+ { uint32_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x36, Return x6, 0x3fffffff);
+ { uint32_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x39, Return x8, 0x3fffffff);
+ { uint32_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x10, 0x3fffffff);
+ { uint32_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x12, 0x3fffffff);
+ { uint32_t x50, uint8_t x51 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x48, Return x14, 0x3fffffff);
+ { uint32_t x53, uint8_t x54 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x51, Return x16, 0x3fffffff);
+ { uint32_t x56, uint8_t x57 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x54, Return x18, 0x3ffffffe);
+ { uint32_t x59, uint8_t x60 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x20, 0x3fffffff);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x22, 0x3fffffff);
+ { uint32_t x65, uint8_t x66 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x24, 0x3fffffff);
+ { uint32_t x68, uint8_t x69 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x66, Return x26, 0x3fffffff);
+ { uint32_t x71, uint8_t x72 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x69, Return x28, 0x3fffffff);
+ { uint32_t x74, uint8_t x75 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x30, 0x3fffffff);
+ { uint32_t x77, uint8_t x78 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x29, 0x3fffffff);
+ { uint32_t x79 = (uint32_t)cmovznz(x78, 0x0, 0xffffffff);
+ { uint32_t x80 = (x79 & 0x3fffffff);
+ { uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x32, Return x80);
+ { uint32_t x84 = (x79 & 0x3fffffff);
+ { uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x35, Return x84);
+ { uint32_t x88 = (x79 & 0x3fffffff);
+ { uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x38, Return x88);
+ { uint32_t x92 = (x79 & 0x3fffffff);
+ { uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x41, Return x92);
+ { uint32_t x96 = (x79 & 0x3fffffff);
+ { uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x44, Return x96);
+ { uint32_t x100 = (x79 & 0x3fffffff);
+ { uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x47, Return x100);
+ { uint32_t x104 = (x79 & 0x3fffffff);
+ { uint32_t x106, uint8_t x107 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x50, Return x104);
+ { uint32_t x108 = (x79 & 0x3fffffff);
+ { uint32_t x110, uint8_t x111 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x107, Return x53, Return x108);
+ { uint32_t x112 = (x79 & 0x3ffffffe);
+ { uint32_t x114, uint8_t x115 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x111, Return x56, Return x112);
+ { uint32_t x116 = (x79 & 0x3fffffff);
+ { uint32_t x118, uint8_t x119 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x115, Return x59, Return x116);
+ { uint32_t x120 = (x79 & 0x3fffffff);
+ { uint32_t x122, uint8_t x123 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x119, Return x62, Return x120);
+ { uint32_t x124 = (x79 & 0x3fffffff);
+ { uint32_t x126, uint8_t x127 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x123, Return x65, Return x124);
+ { uint32_t x128 = (x79 & 0x3fffffff);
+ { uint32_t x130, uint8_t x131 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x127, Return x68, Return x128);
+ { uint32_t x132 = (x79 & 0x3fffffff);
+ { uint32_t x134, uint8_t x135 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x131, Return x71, Return x132);
+ { uint32_t x136 = (x79 & 0x3fffffff);
+ { uint32_t x138, uint8_t x139 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x135, Return x74, Return x136);
+ { uint32_t x140 = (x79 & 0x3fffffff);
+ { uint32_t x142, uint8_t _ = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x139, Return x77, Return x140);
+ out[0] = x82;
+ out[1] = x86;
+ out[2] = x90;
+ out[3] = x94;
+ out[4] = x98;
+ out[5] = x102;
+ out[6] = x106;
+ out[7] = x110;
+ out[8] = x114;
+ out[9] = x118;
+ out[10] = x122;
+ out[11] = x126;
+ out[12] = x130;
+ out[13] = x134;
+ out[14] = x138;
+ out[15] = x142;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e511m187/freeze.c b/src/Specific/solinas32_2e511m187/freeze.c
new file mode 100644
index 000000000..d78a4ec72
--- /dev/null
+++ b/src/Specific/solinas32_2e511m187/freeze.c
@@ -0,0 +1,144 @@
+static void freeze(uint32_t out[28], const uint32_t in1[28]) {
+ { const uint32_t x53 = in1[27];
+ { const uint32_t x54 = in1[26];
+ { const uint32_t x52 = in1[25];
+ { const uint32_t x50 = in1[24];
+ { const uint32_t x48 = in1[23];
+ { const uint32_t x46 = in1[22];
+ { const uint32_t x44 = in1[21];
+ { const uint32_t x42 = in1[20];
+ { const uint32_t x40 = in1[19];
+ { const uint32_t x38 = in1[18];
+ { const uint32_t x36 = in1[17];
+ { const uint32_t x34 = in1[16];
+ { const uint32_t x32 = in1[15];
+ { const uint32_t x30 = in1[14];
+ { const uint32_t x28 = in1[13];
+ { const uint32_t x26 = in1[12];
+ { const uint32_t x24 = in1[11];
+ { const uint32_t x22 = in1[10];
+ { const uint32_t x20 = in1[9];
+ { const uint32_t x18 = in1[8];
+ { const uint32_t x16 = in1[7];
+ { const uint32_t x14 = in1[6];
+ { const uint32_t x12 = in1[5];
+ { const uint32_t x10 = in1[4];
+ { const uint32_t x8 = in1[3];
+ { const uint32_t x6 = in1[2];
+ { const uint32_t x4 = in1[1];
+ { const uint32_t x2 = in1[0];
+ { uint32_t x56, uint8_t x57 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x7ff45);
+ { uint32_t x59, uint8_t x60 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x4, 0x3ffff);
+ { uint32_t x62, uint8_t x63 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x6, 0x3ffff);
+ { uint32_t x65, uint8_t x66 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x8, 0x3ffff);
+ { uint32_t x68, uint8_t x69 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x66, Return x10, 0x7ffff);
+ { uint32_t x71, uint8_t x72 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x69, Return x12, 0x3ffff);
+ { uint32_t x74, uint8_t x75 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x14, 0x3ffff);
+ { uint32_t x77, uint8_t x78 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x16, 0x3ffff);
+ { uint32_t x80, uint8_t x81 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x78, Return x18, 0x7ffff);
+ { uint32_t x83, uint8_t x84 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x81, Return x20, 0x3ffff);
+ { uint32_t x86, uint8_t x87 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x84, Return x22, 0x3ffff);
+ { uint32_t x89, uint8_t x90 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x24, 0x3ffff);
+ { uint32_t x92, uint8_t x93 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x90, Return x26, 0x7ffff);
+ { uint32_t x95, uint8_t x96 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x93, Return x28, 0x3ffff);
+ { uint32_t x98, uint8_t x99 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x96, Return x30, 0x3ffff);
+ { uint32_t x101, uint8_t x102 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x32, 0x3ffff);
+ { uint32_t x104, uint8_t x105 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x102, Return x34, 0x7ffff);
+ { uint32_t x107, uint8_t x108 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x105, Return x36, 0x3ffff);
+ { uint32_t x110, uint8_t x111 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x108, Return x38, 0x3ffff);
+ { uint32_t x113, uint8_t x114 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x111, Return x40, 0x3ffff);
+ { uint32_t x116, uint8_t x117 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x114, Return x42, 0x7ffff);
+ { uint32_t x119, uint8_t x120 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x117, Return x44, 0x3ffff);
+ { uint32_t x122, uint8_t x123 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x120, Return x46, 0x3ffff);
+ { uint32_t x125, uint8_t x126 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x123, Return x48, 0x3ffff);
+ { uint32_t x128, uint8_t x129 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x126, Return x50, 0x7ffff);
+ { uint32_t x131, uint8_t x132 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x129, Return x52, 0x3ffff);
+ { uint32_t x134, uint8_t x135 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x132, Return x54, 0x3ffff);
+ { uint32_t x137, uint8_t x138 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x135, Return x53, 0x3ffff);
+ { uint32_t x139 = (uint32_t)cmovznz(x138, 0x0, 0xffffffff);
+ { uint32_t x140 = (x139 & 0x7ff45);
+ { uint32_t x142, uint8_t x143 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x56, Return x140);
+ { uint32_t x144 = (x139 & 0x3ffff);
+ { uint32_t x146, uint8_t x147 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x143, Return x59, Return x144);
+ { uint32_t x148 = (x139 & 0x3ffff);
+ { uint32_t x150, uint8_t x151 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x147, Return x62, Return x148);
+ { uint32_t x152 = (x139 & 0x3ffff);
+ { uint32_t x154, uint8_t x155 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x151, Return x65, Return x152);
+ { uint32_t x156 = (x139 & 0x7ffff);
+ { uint32_t x158, uint8_t x159 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x155, Return x68, Return x156);
+ { uint32_t x160 = (x139 & 0x3ffff);
+ { uint32_t x162, uint8_t x163 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x159, Return x71, Return x160);
+ { uint32_t x164 = (x139 & 0x3ffff);
+ { uint32_t x166, uint8_t x167 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x163, Return x74, Return x164);
+ { uint32_t x168 = (x139 & 0x3ffff);
+ { uint32_t x170, uint8_t x171 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x167, Return x77, Return x168);
+ { uint32_t x172 = (x139 & 0x7ffff);
+ { uint32_t x174, uint8_t x175 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x171, Return x80, Return x172);
+ { uint32_t x176 = (x139 & 0x3ffff);
+ { uint32_t x178, uint8_t x179 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x175, Return x83, Return x176);
+ { uint32_t x180 = (x139 & 0x3ffff);
+ { uint32_t x182, uint8_t x183 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x179, Return x86, Return x180);
+ { uint32_t x184 = (x139 & 0x3ffff);
+ { uint32_t x186, uint8_t x187 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x183, Return x89, Return x184);
+ { uint32_t x188 = (x139 & 0x7ffff);
+ { uint32_t x190, uint8_t x191 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x187, Return x92, Return x188);
+ { uint32_t x192 = (x139 & 0x3ffff);
+ { uint32_t x194, uint8_t x195 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x191, Return x95, Return x192);
+ { uint32_t x196 = (x139 & 0x3ffff);
+ { uint32_t x198, uint8_t x199 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x195, Return x98, Return x196);
+ { uint32_t x200 = (x139 & 0x3ffff);
+ { uint32_t x202, uint8_t x203 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x199, Return x101, Return x200);
+ { uint32_t x204 = (x139 & 0x7ffff);
+ { uint32_t x206, uint8_t x207 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x203, Return x104, Return x204);
+ { uint32_t x208 = (x139 & 0x3ffff);
+ { uint32_t x210, uint8_t x211 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x207, Return x107, Return x208);
+ { uint32_t x212 = (x139 & 0x3ffff);
+ { uint32_t x214, uint8_t x215 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x211, Return x110, Return x212);
+ { uint32_t x216 = (x139 & 0x3ffff);
+ { uint32_t x218, uint8_t x219 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x215, Return x113, Return x216);
+ { uint32_t x220 = (x139 & 0x7ffff);
+ { uint32_t x222, uint8_t x223 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x219, Return x116, Return x220);
+ { uint32_t x224 = (x139 & 0x3ffff);
+ { uint32_t x226, uint8_t x227 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x223, Return x119, Return x224);
+ { uint32_t x228 = (x139 & 0x3ffff);
+ { uint32_t x230, uint8_t x231 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x227, Return x122, Return x228);
+ { uint32_t x232 = (x139 & 0x3ffff);
+ { uint32_t x234, uint8_t x235 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x231, Return x125, Return x232);
+ { uint32_t x236 = (x139 & 0x7ffff);
+ { uint32_t x238, uint8_t x239 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x235, Return x128, Return x236);
+ { uint32_t x240 = (x139 & 0x3ffff);
+ { uint32_t x242, uint8_t x243 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x239, Return x131, Return x240);
+ { uint32_t x244 = (x139 & 0x3ffff);
+ { uint32_t x246, uint8_t x247 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x243, Return x134, Return x244);
+ { uint32_t x248 = (x139 & 0x3ffff);
+ { uint32_t x250, uint8_t _ = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x247, Return x137, Return x248);
+ out[0] = x142;
+ out[1] = x146;
+ out[2] = x150;
+ out[3] = x154;
+ out[4] = x158;
+ out[5] = x162;
+ out[6] = x166;
+ out[7] = x170;
+ out[8] = x174;
+ out[9] = x178;
+ out[10] = x182;
+ out[11] = x186;
+ out[12] = x190;
+ out[13] = x194;
+ out[14] = x198;
+ out[15] = x202;
+ out[16] = x206;
+ out[17] = x210;
+ out[18] = x214;
+ out[19] = x218;
+ out[20] = x222;
+ out[21] = x226;
+ out[22] = x230;
+ out[23] = x234;
+ out[24] = x238;
+ out[25] = x242;
+ out[26] = x246;
+ out[27] = x250;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas32_2e511m187/freezeDisplay.log b/src/Specific/solinas32_2e511m187/freezeDisplay.log
index 785a4fcf3..d4f86744c 100644
--- a/src/Specific/solinas32_2e511m187/freezeDisplay.log
+++ b/src/Specific/solinas32_2e511m187/freezeDisplay.log
@@ -2,7 +2,7 @@
Interp-η
(λ var : Syntax.base_type → Type,
λ '(x53, x54, x52, x50, x48, x46, x44, x42, x40, x38, x36, x34, x32, x30, x28, x26, x24, x22, x20, x18, x16, x14, x12, x10, x8, x6, x4, x2)%core,
- uint32_t x56, uint8_t x57 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, Const 524101);
+ uint32_t x56, uint8_t x57 = Op (Syntax.SubWithGetBorrow 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x7ff45);
uint32_t x59, uint8_t x60 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x4, 0x3ffff);
uint32_t x62, uint8_t x63 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x6, 0x3ffff);
uint32_t x65, uint8_t x66 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x8, 0x3ffff);
@@ -31,61 +31,61 @@ Interp-η
uint32_t x134, uint8_t x135 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x132, Return x54, 0x3ffff);
uint32_t x137, uint8_t x138 = Op (Syntax.SubWithGetBorrow 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x135, Return x53, 0x3ffff);
uint32_t x139 = (uint32_t)cmovznz(x138, 0x0, 0xffffffff);
- uint32_t x140 = x139 & Const 524101;
+ uint32_t x140 = (x139 & 0x7ff45);
uint32_t x142, uint8_t x143 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x56, Return x140);
- uint32_t x144 = x139 & 0x3ffff;
+ uint32_t x144 = (x139 & 0x3ffff);
uint32_t x146, uint8_t x147 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x143, Return x59, Return x144);
- uint32_t x148 = x139 & 0x3ffff;
+ uint32_t x148 = (x139 & 0x3ffff);
uint32_t x150, uint8_t x151 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x147, Return x62, Return x148);
- uint32_t x152 = x139 & 0x3ffff;
+ uint32_t x152 = (x139 & 0x3ffff);
uint32_t x154, uint8_t x155 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x151, Return x65, Return x152);
- uint32_t x156 = x139 & 0x7ffff;
+ uint32_t x156 = (x139 & 0x7ffff);
uint32_t x158, uint8_t x159 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x155, Return x68, Return x156);
- uint32_t x160 = x139 & 0x3ffff;
+ uint32_t x160 = (x139 & 0x3ffff);
uint32_t x162, uint8_t x163 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x159, Return x71, Return x160);
- uint32_t x164 = x139 & 0x3ffff;
+ uint32_t x164 = (x139 & 0x3ffff);
uint32_t x166, uint8_t x167 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x163, Return x74, Return x164);
- uint32_t x168 = x139 & 0x3ffff;
+ uint32_t x168 = (x139 & 0x3ffff);
uint32_t x170, uint8_t x171 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x167, Return x77, Return x168);
- uint32_t x172 = x139 & 0x7ffff;
+ uint32_t x172 = (x139 & 0x7ffff);
uint32_t x174, uint8_t x175 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x171, Return x80, Return x172);
- uint32_t x176 = x139 & 0x3ffff;
+ uint32_t x176 = (x139 & 0x3ffff);
uint32_t x178, uint8_t x179 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x175, Return x83, Return x176);
- uint32_t x180 = x139 & 0x3ffff;
+ uint32_t x180 = (x139 & 0x3ffff);
uint32_t x182, uint8_t x183 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x179, Return x86, Return x180);
- uint32_t x184 = x139 & 0x3ffff;
+ uint32_t x184 = (x139 & 0x3ffff);
uint32_t x186, uint8_t x187 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x183, Return x89, Return x184);
- uint32_t x188 = x139 & 0x7ffff;
+ uint32_t x188 = (x139 & 0x7ffff);
uint32_t x190, uint8_t x191 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x187, Return x92, Return x188);
- uint32_t x192 = x139 & 0x3ffff;
+ uint32_t x192 = (x139 & 0x3ffff);
uint32_t x194, uint8_t x195 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x191, Return x95, Return x192);
- uint32_t x196 = x139 & 0x3ffff;
+ uint32_t x196 = (x139 & 0x3ffff);
uint32_t x198, uint8_t x199 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x195, Return x98, Return x196);
- uint32_t x200 = x139 & 0x3ffff;
+ uint32_t x200 = (x139 & 0x3ffff);
uint32_t x202, uint8_t x203 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x199, Return x101, Return x200);
- uint32_t x204 = x139 & 0x7ffff;
+ uint32_t x204 = (x139 & 0x7ffff);
uint32_t x206, uint8_t x207 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x203, Return x104, Return x204);
- uint32_t x208 = x139 & 0x3ffff;
+ uint32_t x208 = (x139 & 0x3ffff);
uint32_t x210, uint8_t x211 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x207, Return x107, Return x208);
- uint32_t x212 = x139 & 0x3ffff;
+ uint32_t x212 = (x139 & 0x3ffff);
uint32_t x214, uint8_t x215 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x211, Return x110, Return x212);
- uint32_t x216 = x139 & 0x3ffff;
+ uint32_t x216 = (x139 & 0x3ffff);
uint32_t x218, uint8_t x219 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x215, Return x113, Return x216);
- uint32_t x220 = x139 & 0x7ffff;
+ uint32_t x220 = (x139 & 0x7ffff);
uint32_t x222, uint8_t x223 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x219, Return x116, Return x220);
- uint32_t x224 = x139 & 0x3ffff;
+ uint32_t x224 = (x139 & 0x3ffff);
uint32_t x226, uint8_t x227 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x223, Return x119, Return x224);
- uint32_t x228 = x139 & 0x3ffff;
+ uint32_t x228 = (x139 & 0x3ffff);
uint32_t x230, uint8_t x231 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x227, Return x122, Return x228);
- uint32_t x232 = x139 & 0x3ffff;
+ uint32_t x232 = (x139 & 0x3ffff);
uint32_t x234, uint8_t x235 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x231, Return x125, Return x232);
- uint32_t x236 = x139 & 0x7ffff;
+ uint32_t x236 = (x139 & 0x7ffff);
uint32_t x238, uint8_t x239 = Op (Syntax.AddWithGetCarry 19 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x235, Return x128, Return x236);
- uint32_t x240 = x139 & 0x3ffff;
+ uint32_t x240 = (x139 & 0x3ffff);
uint32_t x242, uint8_t x243 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x239, Return x131, Return x240);
- uint32_t x244 = x139 & 0x3ffff;
+ uint32_t x244 = (x139 & 0x3ffff);
uint32_t x246, uint8_t x247 = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x243, Return x134, Return x244);
- uint32_t x248 = x139 & 0x3ffff;
+ uint32_t x248 = (x139 & 0x3ffff);
uint32_t x250, uint8_t _ = Op (Syntax.AddWithGetCarry 18 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x247, Return x137, Return x248);
(Return x250, Return x246, Return x242, Return x238, Return x234, Return x230, Return x226, Return x222, Return x218, Return x214, Return x210, Return x206, Return x202, Return x198, Return x194, Return x190, Return x186, Return x182, Return x178, Return x174, Return x170, Return x166, Return x162, Return x158, Return x154, Return x150, Return x146, Return x142))
x
diff --git a/src/Specific/solinas64_2e127m1/femul.c b/src/Specific/solinas64_2e127m1/femul.c
index e38dc1016..4d53c606f 100644
--- a/src/Specific/solinas64_2e127m1/femul.c
+++ b/src/Specific/solinas64_2e127m1/femul.c
@@ -1,41 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint128_t x12 = (((uint128_t)x5 * x10) + ((0x2 * ((uint128_t)x7 * x11)) + ((uint128_t)x6 * x9)));
-{ uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + ((uint128_t)x6 * x10));
-{ uint128_t x14 = (((uint128_t)x5 * x9) + ((0x2 * ((uint128_t)x7 * x10)) + (0x2 * ((uint128_t)x6 * x11))));
-{ uint64_t x15 = (uint64_t) (x14 >> 0x2b);
-{ uint64_t x16 = ((uint64_t)x14 & 0x7ffffffffff);
-{ uint128_t x17 = (x15 + x13);
-{ uint64_t x18 = (uint64_t) (x17 >> 0x2a);
-{ uint64_t x19 = ((uint64_t)x17 & 0x3ffffffffff);
-{ uint128_t x20 = (x18 + x12);
-{ uint64_t x21 = (uint64_t) (x20 >> 0x2a);
-{ uint64_t x22 = ((uint64_t)x20 & 0x3ffffffffff);
-{ uint64_t x23 = (x16 + x21);
-{ uint64_t x24 = (x23 >> 0x2b);
-{ uint64_t x25 = (x23 & 0x7ffffffffff);
-{ uint64_t x26 = (x24 + x19);
-{ uint64_t x27 = (x26 >> 0x2a);
-{ uint64_t x28 = (x26 & 0x3ffffffffff);
-out[0] = x27 + x22;
-out[1] = x28;
-out[2] = x25;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint128_t x12 = (((uint128_t)x5 * x10) + ((0x2 * ((uint128_t)x7 * x11)) + ((uint128_t)x6 * x9)));
+ { uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + ((uint128_t)x6 * x10));
+ { uint128_t x14 = (((uint128_t)x5 * x9) + ((0x2 * ((uint128_t)x7 * x10)) + (0x2 * ((uint128_t)x6 * x11))));
+ { uint64_t x15 = (uint64_t) (x14 >> 0x2b);
+ { uint64_t x16 = ((uint64_t)x14 & 0x7ffffffffff);
+ { uint128_t x17 = (x15 + x13);
+ { uint64_t x18 = (uint64_t) (x17 >> 0x2a);
+ { uint64_t x19 = ((uint64_t)x17 & 0x3ffffffffff);
+ { uint128_t x20 = (x18 + x12);
+ { uint64_t x21 = (uint64_t) (x20 >> 0x2a);
+ { uint64_t x22 = ((uint64_t)x20 & 0x3ffffffffff);
+ { uint64_t x23 = (x16 + x21);
+ { uint64_t x24 = (x23 >> 0x2b);
+ { uint64_t x25 = (x23 & 0x7ffffffffff);
+ { uint64_t x26 = (x24 + x19);
+ { uint64_t x27 = (x26 >> 0x2a);
+ { uint64_t x28 = (x26 & 0x3ffffffffff);
+ out[0] = x25;
+ out[1] = x28;
+ out[2] = (x27 + x22);
+ }}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e127m1/fesquare.c b/src/Specific/solinas64_2e127m1/fesquare.c
index 80e046893..3c6dcc648 100644
--- a/src/Specific/solinas64_2e127m1/fesquare.c
+++ b/src/Specific/solinas64_2e127m1/fesquare.c
@@ -1,41 +1,26 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint128_t x5 = (((uint128_t)x2 * x3) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x3 * x2)));
-{ uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + ((uint128_t)x3 * x3));
-{ uint128_t x7 = (((uint128_t)x2 * x2) + ((0x2 * ((uint128_t)x4 * x3)) + (0x2 * ((uint128_t)x3 * x4))));
-{ uint64_t x8 = (uint64_t) (x7 >> 0x2b);
-{ uint64_t x9 = ((uint64_t)x7 & 0x7ffffffffff);
-{ uint128_t x10 = (x8 + x6);
-{ uint64_t x11 = (uint64_t) (x10 >> 0x2a);
-{ uint64_t x12 = ((uint64_t)x10 & 0x3ffffffffff);
-{ uint128_t x13 = (x11 + x5);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x2a);
-{ uint64_t x15 = ((uint64_t)x13 & 0x3ffffffffff);
-{ uint64_t x16 = (x9 + x14);
-{ uint64_t x17 = (x16 >> 0x2b);
-{ uint64_t x18 = (x16 & 0x7ffffffffff);
-{ uint64_t x19 = (x17 + x12);
-{ uint64_t x20 = (x19 >> 0x2a);
-{ uint64_t x21 = (x19 & 0x3ffffffffff);
-out[0] = x20 + x15;
-out[1] = x21;
-out[2] = x18;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesquare(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x5 = (((uint128_t)x2 * x3) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x3 * x2)));
+ { uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + ((uint128_t)x3 * x3));
+ { uint128_t x7 = (((uint128_t)x2 * x2) + ((0x2 * ((uint128_t)x4 * x3)) + (0x2 * ((uint128_t)x3 * x4))));
+ { uint64_t x8 = (uint64_t) (x7 >> 0x2b);
+ { uint64_t x9 = ((uint64_t)x7 & 0x7ffffffffff);
+ { uint128_t x10 = (x8 + x6);
+ { uint64_t x11 = (uint64_t) (x10 >> 0x2a);
+ { uint64_t x12 = ((uint64_t)x10 & 0x3ffffffffff);
+ { uint128_t x13 = (x11 + x5);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x2a);
+ { uint64_t x15 = ((uint64_t)x13 & 0x3ffffffffff);
+ { uint64_t x16 = (x9 + x14);
+ { uint64_t x17 = (x16 >> 0x2b);
+ { uint64_t x18 = (x16 & 0x7ffffffffff);
+ { uint64_t x19 = (x17 + x12);
+ { uint64_t x20 = (x19 >> 0x2a);
+ { uint64_t x21 = (x19 & 0x3ffffffffff);
+ out[0] = x18;
+ out[1] = x21;
+ out[2] = (x20 + x15);
+ }}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e127m1/freeze.c b/src/Specific/solinas64_2e127m1/freeze.c
index ae530ce17..7f3ddd4fe 100644
--- a/src/Specific/solinas64_2e127m1/freeze.c
+++ b/src/Specific/solinas64_2e127m1/freeze.c
@@ -1,25 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x6;
-out[1] = uint8_t x7 = Op Syntax.SubWithGetBorrow 43 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7ffffffffff;;
+static void freeze(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6, uint8_t x7 = Op (Syntax.SubWithGetBorrow 43 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x7ffffffffff);
+ { uint64_t x9, uint8_t x10 = Op (Syntax.SubWithGetBorrow 42 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x7, Return x4, 0x3ffffffffff);
+ { uint64_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 42 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x10, Return x3, 0x3ffffffffff);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0x7ffffffffff);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.AddWithGetCarry 43 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x6, Return x15);
+ { uint64_t x19 = (x14 & 0x3ffffffffff);
+ { uint64_t x21, uint8_t x22 = Op (Syntax.AddWithGetCarry 42 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x18, Return x9, Return x19);
+ { uint64_t x23 = (x14 & 0x3ffffffffff);
+ { uint64_t x25, uint8_t _ = Op (Syntax.AddWithGetCarry 42 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x22, Return x12, Return x23);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e129m25/femul.c b/src/Specific/solinas64_2e129m25/femul.c
index 85658de00..f4d7f2f96 100644
--- a/src/Specific/solinas64_2e129m25/femul.c
+++ b/src/Specific/solinas64_2e129m25/femul.c
@@ -1,41 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
-{ uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x19 * ((uint128_t)x6 * x10)));
-{ uint128_t x14 = (((uint128_t)x5 * x9) + (0x19 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
-{ uint64_t x15 = (uint64_t) (x14 >> 0x2b);
-{ uint64_t x16 = ((uint64_t)x14 & 0x7ffffffffff);
-{ uint128_t x17 = (x15 + x13);
-{ uint64_t x18 = (uint64_t) (x17 >> 0x2b);
-{ uint64_t x19 = ((uint64_t)x17 & 0x7ffffffffff);
-{ uint128_t x20 = (x18 + x12);
-{ uint64_t x21 = (uint64_t) (x20 >> 0x2b);
-{ uint64_t x22 = ((uint64_t)x20 & 0x7ffffffffff);
-{ uint64_t x23 = (x16 + (0x19 * x21));
-{ uint64_t x24 = (x23 >> 0x2b);
-{ uint64_t x25 = (x23 & 0x7ffffffffff);
-{ uint64_t x26 = (x24 + x19);
-{ uint64_t x27 = (x26 >> 0x2b);
-{ uint64_t x28 = (x26 & 0x7ffffffffff);
-out[0] = x27 + x22;
-out[1] = x28;
-out[2] = x25;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
+ { uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x19 * ((uint128_t)x6 * x10)));
+ { uint128_t x14 = (((uint128_t)x5 * x9) + (0x19 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
+ { uint64_t x15 = (uint64_t) (x14 >> 0x2b);
+ { uint64_t x16 = ((uint64_t)x14 & 0x7ffffffffff);
+ { uint128_t x17 = (x15 + x13);
+ { uint64_t x18 = (uint64_t) (x17 >> 0x2b);
+ { uint64_t x19 = ((uint64_t)x17 & 0x7ffffffffff);
+ { uint128_t x20 = (x18 + x12);
+ { uint64_t x21 = (uint64_t) (x20 >> 0x2b);
+ { uint64_t x22 = ((uint64_t)x20 & 0x7ffffffffff);
+ { uint64_t x23 = (x16 + (0x19 * x21));
+ { uint64_t x24 = (x23 >> 0x2b);
+ { uint64_t x25 = (x23 & 0x7ffffffffff);
+ { uint64_t x26 = (x24 + x19);
+ { uint64_t x27 = (x26 >> 0x2b);
+ { uint64_t x28 = (x26 & 0x7ffffffffff);
+ out[0] = x25;
+ out[1] = x28;
+ out[2] = (x27 + x22);
+ }}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e129m25/fesquare.c b/src/Specific/solinas64_2e129m25/fesquare.c
index 4bc1e1a75..e46051319 100644
--- a/src/Specific/solinas64_2e129m25/fesquare.c
+++ b/src/Specific/solinas64_2e129m25/fesquare.c
@@ -1,41 +1,26 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
-{ uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x19 * ((uint128_t)x3 * x3)));
-{ uint128_t x7 = (((uint128_t)x2 * x2) + (0x19 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
-{ uint64_t x8 = (uint64_t) (x7 >> 0x2b);
-{ uint64_t x9 = ((uint64_t)x7 & 0x7ffffffffff);
-{ uint128_t x10 = (x8 + x6);
-{ uint64_t x11 = (uint64_t) (x10 >> 0x2b);
-{ uint64_t x12 = ((uint64_t)x10 & 0x7ffffffffff);
-{ uint128_t x13 = (x11 + x5);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x2b);
-{ uint64_t x15 = ((uint64_t)x13 & 0x7ffffffffff);
-{ uint64_t x16 = (x9 + (0x19 * x14));
-{ uint64_t x17 = (x16 >> 0x2b);
-{ uint64_t x18 = (x16 & 0x7ffffffffff);
-{ uint64_t x19 = (x17 + x12);
-{ uint64_t x20 = (x19 >> 0x2b);
-{ uint64_t x21 = (x19 & 0x7ffffffffff);
-out[0] = x20 + x15;
-out[1] = x21;
-out[2] = x18;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesquare(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
+ { uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x19 * ((uint128_t)x3 * x3)));
+ { uint128_t x7 = (((uint128_t)x2 * x2) + (0x19 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
+ { uint64_t x8 = (uint64_t) (x7 >> 0x2b);
+ { uint64_t x9 = ((uint64_t)x7 & 0x7ffffffffff);
+ { uint128_t x10 = (x8 + x6);
+ { uint64_t x11 = (uint64_t) (x10 >> 0x2b);
+ { uint64_t x12 = ((uint64_t)x10 & 0x7ffffffffff);
+ { uint128_t x13 = (x11 + x5);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x2b);
+ { uint64_t x15 = ((uint64_t)x13 & 0x7ffffffffff);
+ { uint64_t x16 = (x9 + (0x19 * x14));
+ { uint64_t x17 = (x16 >> 0x2b);
+ { uint64_t x18 = (x16 & 0x7ffffffffff);
+ { uint64_t x19 = (x17 + x12);
+ { uint64_t x20 = (x19 >> 0x2b);
+ { uint64_t x21 = (x19 & 0x7ffffffffff);
+ out[0] = x18;
+ out[1] = x21;
+ out[2] = (x20 + x15);
+ }}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e129m25/freeze.c b/src/Specific/solinas64_2e129m25/freeze.c
index 30c6c2027..dafc25f97 100644
--- a/src/Specific/solinas64_2e129m25/freeze.c
+++ b/src/Specific/solinas64_2e129m25/freeze.c
@@ -1,25 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x6;
-out[1] = uint8_t x7 = Op Syntax.SubWithGetBorrow 43 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7ffffffffe7;;
+static void freeze(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6, uint8_t x7 = Op (Syntax.SubWithGetBorrow 43 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x7ffffffffe7);
+ { uint64_t x9, uint8_t x10 = Op (Syntax.SubWithGetBorrow 43 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x7, Return x4, 0x7ffffffffff);
+ { uint64_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 43 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x10, Return x3, 0x7ffffffffff);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0x7ffffffffe7);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.AddWithGetCarry 43 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x6, Return x15);
+ { uint64_t x19 = (x14 & 0x7ffffffffff);
+ { uint64_t x21, uint8_t x22 = Op (Syntax.AddWithGetCarry 43 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x18, Return x9, Return x19);
+ { uint64_t x23 = (x14 & 0x7ffffffffff);
+ { uint64_t x25, uint8_t _ = Op (Syntax.AddWithGetCarry 43 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x22, Return x12, Return x23);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e130m5/femul.c b/src/Specific/solinas64_2e130m5/femul.c
index db6eb8634..a14294d6d 100644
--- a/src/Specific/solinas64_2e130m5/femul.c
+++ b/src/Specific/solinas64_2e130m5/femul.c
@@ -1,41 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint128_t x12 = (((uint128_t)x5 * x10) + ((0x2 * ((uint128_t)x7 * x11)) + ((uint128_t)x6 * x9)));
-{ uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x5 * ((uint128_t)x6 * x10)));
-{ uint128_t x14 = (((uint128_t)x5 * x9) + (0x5 * ((0x2 * ((uint128_t)x7 * x10)) + (0x2 * ((uint128_t)x6 * x11)))));
-{ uint64_t x15 = (uint64_t) (x14 >> 0x2c);
-{ uint64_t x16 = ((uint64_t)x14 & 0xfffffffffff);
-{ uint128_t x17 = (x15 + x13);
-{ uint64_t x18 = (uint64_t) (x17 >> 0x2b);
-{ uint64_t x19 = ((uint64_t)x17 & 0x7ffffffffff);
-{ uint128_t x20 = (x18 + x12);
-{ uint64_t x21 = (uint64_t) (x20 >> 0x2b);
-{ uint64_t x22 = ((uint64_t)x20 & 0x7ffffffffff);
-{ uint64_t x23 = (x16 + (0x5 * x21));
-{ uint64_t x24 = (x23 >> 0x2c);
-{ uint64_t x25 = (x23 & 0xfffffffffff);
-{ uint64_t x26 = (x24 + x19);
-{ uint64_t x27 = (x26 >> 0x2b);
-{ uint64_t x28 = (x26 & 0x7ffffffffff);
-out[0] = x27 + x22;
-out[1] = x28;
-out[2] = x25;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint128_t x12 = (((uint128_t)x5 * x10) + ((0x2 * ((uint128_t)x7 * x11)) + ((uint128_t)x6 * x9)));
+ { uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x5 * ((uint128_t)x6 * x10)));
+ { uint128_t x14 = (((uint128_t)x5 * x9) + (0x5 * ((0x2 * ((uint128_t)x7 * x10)) + (0x2 * ((uint128_t)x6 * x11)))));
+ { uint64_t x15 = (uint64_t) (x14 >> 0x2c);
+ { uint64_t x16 = ((uint64_t)x14 & 0xfffffffffff);
+ { uint128_t x17 = (x15 + x13);
+ { uint64_t x18 = (uint64_t) (x17 >> 0x2b);
+ { uint64_t x19 = ((uint64_t)x17 & 0x7ffffffffff);
+ { uint128_t x20 = (x18 + x12);
+ { uint64_t x21 = (uint64_t) (x20 >> 0x2b);
+ { uint64_t x22 = ((uint64_t)x20 & 0x7ffffffffff);
+ { uint64_t x23 = (x16 + (0x5 * x21));
+ { uint64_t x24 = (x23 >> 0x2c);
+ { uint64_t x25 = (x23 & 0xfffffffffff);
+ { uint64_t x26 = (x24 + x19);
+ { uint64_t x27 = (x26 >> 0x2b);
+ { uint64_t x28 = (x26 & 0x7ffffffffff);
+ out[0] = x25;
+ out[1] = x28;
+ out[2] = (x27 + x22);
+ }}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e130m5/fesquare.c b/src/Specific/solinas64_2e130m5/fesquare.c
index d55560f0c..499c28d6d 100644
--- a/src/Specific/solinas64_2e130m5/fesquare.c
+++ b/src/Specific/solinas64_2e130m5/fesquare.c
@@ -1,41 +1,26 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint128_t x5 = (((uint128_t)x2 * x3) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x3 * x2)));
-{ uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x5 * ((uint128_t)x3 * x3)));
-{ uint128_t x7 = (((uint128_t)x2 * x2) + (0x5 * ((0x2 * ((uint128_t)x4 * x3)) + (0x2 * ((uint128_t)x3 * x4)))));
-{ uint64_t x8 = (uint64_t) (x7 >> 0x2c);
-{ uint64_t x9 = ((uint64_t)x7 & 0xfffffffffff);
-{ uint128_t x10 = (x8 + x6);
-{ uint64_t x11 = (uint64_t) (x10 >> 0x2b);
-{ uint64_t x12 = ((uint64_t)x10 & 0x7ffffffffff);
-{ uint128_t x13 = (x11 + x5);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x2b);
-{ uint64_t x15 = ((uint64_t)x13 & 0x7ffffffffff);
-{ uint64_t x16 = (x9 + (0x5 * x14));
-{ uint64_t x17 = (x16 >> 0x2c);
-{ uint64_t x18 = (x16 & 0xfffffffffff);
-{ uint64_t x19 = (x17 + x12);
-{ uint64_t x20 = (x19 >> 0x2b);
-{ uint64_t x21 = (x19 & 0x7ffffffffff);
-out[0] = x20 + x15;
-out[1] = x21;
-out[2] = x18;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesquare(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x5 = (((uint128_t)x2 * x3) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x3 * x2)));
+ { uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x5 * ((uint128_t)x3 * x3)));
+ { uint128_t x7 = (((uint128_t)x2 * x2) + (0x5 * ((0x2 * ((uint128_t)x4 * x3)) + (0x2 * ((uint128_t)x3 * x4)))));
+ { uint64_t x8 = (uint64_t) (x7 >> 0x2c);
+ { uint64_t x9 = ((uint64_t)x7 & 0xfffffffffff);
+ { uint128_t x10 = (x8 + x6);
+ { uint64_t x11 = (uint64_t) (x10 >> 0x2b);
+ { uint64_t x12 = ((uint64_t)x10 & 0x7ffffffffff);
+ { uint128_t x13 = (x11 + x5);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x2b);
+ { uint64_t x15 = ((uint64_t)x13 & 0x7ffffffffff);
+ { uint64_t x16 = (x9 + (0x5 * x14));
+ { uint64_t x17 = (x16 >> 0x2c);
+ { uint64_t x18 = (x16 & 0xfffffffffff);
+ { uint64_t x19 = (x17 + x12);
+ { uint64_t x20 = (x19 >> 0x2b);
+ { uint64_t x21 = (x19 & 0x7ffffffffff);
+ out[0] = x18;
+ out[1] = x21;
+ out[2] = (x20 + x15);
+ }}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e130m5/freeze.c b/src/Specific/solinas64_2e130m5/freeze.c
index 3ee95bba8..d6ee6998a 100644
--- a/src/Specific/solinas64_2e130m5/freeze.c
+++ b/src/Specific/solinas64_2e130m5/freeze.c
@@ -1,25 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x6;
-out[1] = uint8_t x7 = Op Syntax.SubWithGetBorrow 44 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffffffffb;;
+static void freeze(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6, uint8_t x7 = Op (Syntax.SubWithGetBorrow 44 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xffffffffffb);
+ { uint64_t x9, uint8_t x10 = Op (Syntax.SubWithGetBorrow 43 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x7, Return x4, 0x7ffffffffff);
+ { uint64_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 43 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x10, Return x3, 0x7ffffffffff);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0xffffffffffb);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.AddWithGetCarry 44 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x6, Return x15);
+ { uint64_t x19 = (x14 & 0x7ffffffffff);
+ { uint64_t x21, uint8_t x22 = Op (Syntax.AddWithGetCarry 43 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x18, Return x9, Return x19);
+ { uint64_t x23 = (x14 & 0x7ffffffffff);
+ { uint64_t x25, uint8_t _ = Op (Syntax.AddWithGetCarry 43 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x22, Return x12, Return x23);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e137m13/femul.c b/src/Specific/solinas64_2e137m13/femul.c
index 085d80ee8..d68990736 100644
--- a/src/Specific/solinas64_2e137m13/femul.c
+++ b/src/Specific/solinas64_2e137m13/femul.c
@@ -1,46 +1,36 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint128_t x16 = (((uint128_t)x5 * x14) + ((0x2 * ((uint128_t)x7 * x15)) + ((0x2 * ((uint128_t)x9 * x13)) + ((uint128_t)x8 * x11))));
-{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0xd * ((uint128_t)x8 * x14)));
-{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0xd * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
-{ uint128_t x19 = (((uint128_t)x5 * x11) + (0xd * ((0x2 * ((uint128_t)x7 * x14)) + ((0x2 * ((uint128_t)x9 * x15)) + (0x2 * ((uint128_t)x8 * x13))))));
-{ uint64_t x20 = (uint64_t) (x19 >> 0x23);
-{ uint64_t x21 = ((uint64_t)x19 & 0x7ffffffff);
-{ uint128_t x22 = (x20 + x18);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x22);
-{ uint64_t x24 = ((uint64_t)x22 & 0x3ffffffff);
-{ uint128_t x25 = (x23 + x17);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x22);
-{ uint64_t x27 = ((uint64_t)x25 & 0x3ffffffff);
-{ uint128_t x28 = (x26 + x16);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x22);
-{ uint64_t x30 = ((uint64_t)x28 & 0x3ffffffff);
-{ uint64_t x31 = (x21 + (0xd * x29));
-{ uint64_t x32 = (x31 >> 0x23);
-{ uint64_t x33 = (x31 & 0x7ffffffff);
-{ uint64_t x34 = (x32 + x24);
-{ uint64_t x35 = (x34 >> 0x22);
-{ uint64_t x36 = (x34 & 0x3ffffffff);
-out[0] = x30;
-out[1] = x35 + x27;
-out[2] = x36;
-out[3] = x33;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint128_t x16 = (((uint128_t)x5 * x14) + ((0x2 * ((uint128_t)x7 * x15)) + ((0x2 * ((uint128_t)x9 * x13)) + ((uint128_t)x8 * x11))));
+ { uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0xd * ((uint128_t)x8 * x14)));
+ { uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0xd * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+ { uint128_t x19 = (((uint128_t)x5 * x11) + (0xd * ((0x2 * ((uint128_t)x7 * x14)) + ((0x2 * ((uint128_t)x9 * x15)) + (0x2 * ((uint128_t)x8 * x13))))));
+ { uint64_t x20 = (uint64_t) (x19 >> 0x23);
+ { uint64_t x21 = ((uint64_t)x19 & 0x7ffffffff);
+ { uint128_t x22 = (x20 + x18);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x22);
+ { uint64_t x24 = ((uint64_t)x22 & 0x3ffffffff);
+ { uint128_t x25 = (x23 + x17);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x22);
+ { uint64_t x27 = ((uint64_t)x25 & 0x3ffffffff);
+ { uint128_t x28 = (x26 + x16);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x22);
+ { uint64_t x30 = ((uint64_t)x28 & 0x3ffffffff);
+ { uint64_t x31 = (x21 + (0xd * x29));
+ { uint64_t x32 = (x31 >> 0x23);
+ { uint64_t x33 = (x31 & 0x7ffffffff);
+ { uint64_t x34 = (x32 + x24);
+ { uint64_t x35 = (x34 >> 0x22);
+ { uint64_t x36 = (x34 & 0x3ffffffff);
+ out[0] = x33;
+ out[1] = x36;
+ out[2] = (x35 + x27);
+ out[3] = x30;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e137m13/fesquare.c b/src/Specific/solinas64_2e137m13/fesquare.c
index 6858e3e57..ea414891a 100644
--- a/src/Specific/solinas64_2e137m13/fesquare.c
+++ b/src/Specific/solinas64_2e137m13/fesquare.c
@@ -1,46 +1,32 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x7 = (((uint128_t)x2 * x5) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x5 * x2))));
-{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0xd * ((uint128_t)x5 * x5)));
-{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xd * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
-{ uint128_t x10 = (((uint128_t)x2 * x2) + (0xd * ((0x2 * ((uint128_t)x4 * x5)) + ((0x2 * ((uint128_t)x6 * x6)) + (0x2 * ((uint128_t)x5 * x4))))));
-{ uint64_t x11 = (uint64_t) (x10 >> 0x23);
-{ uint64_t x12 = ((uint64_t)x10 & 0x7ffffffff);
-{ uint128_t x13 = (x11 + x9);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x22);
-{ uint64_t x15 = ((uint64_t)x13 & 0x3ffffffff);
-{ uint128_t x16 = (x14 + x8);
-{ uint64_t x17 = (uint64_t) (x16 >> 0x22);
-{ uint64_t x18 = ((uint64_t)x16 & 0x3ffffffff);
-{ uint128_t x19 = (x17 + x7);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x22);
-{ uint64_t x21 = ((uint64_t)x19 & 0x3ffffffff);
-{ uint64_t x22 = (x12 + (0xd * x20));
-{ uint64_t x23 = (x22 >> 0x23);
-{ uint64_t x24 = (x22 & 0x7ffffffff);
-{ uint64_t x25 = (x23 + x15);
-{ uint64_t x26 = (x25 >> 0x22);
-{ uint64_t x27 = (x25 & 0x3ffffffff);
-out[0] = x21;
-out[1] = x26 + x18;
-out[2] = x27;
-out[3] = x24;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesquare(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x7 = (((uint128_t)x2 * x5) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x5 * x2))));
+ { uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0xd * ((uint128_t)x5 * x5)));
+ { uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xd * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+ { uint128_t x10 = (((uint128_t)x2 * x2) + (0xd * ((0x2 * ((uint128_t)x4 * x5)) + ((0x2 * ((uint128_t)x6 * x6)) + (0x2 * ((uint128_t)x5 * x4))))));
+ { uint64_t x11 = (uint64_t) (x10 >> 0x23);
+ { uint64_t x12 = ((uint64_t)x10 & 0x7ffffffff);
+ { uint128_t x13 = (x11 + x9);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x22);
+ { uint64_t x15 = ((uint64_t)x13 & 0x3ffffffff);
+ { uint128_t x16 = (x14 + x8);
+ { uint64_t x17 = (uint64_t) (x16 >> 0x22);
+ { uint64_t x18 = ((uint64_t)x16 & 0x3ffffffff);
+ { uint128_t x19 = (x17 + x7);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x22);
+ { uint64_t x21 = ((uint64_t)x19 & 0x3ffffffff);
+ { uint64_t x22 = (x12 + (0xd * x20));
+ { uint64_t x23 = (x22 >> 0x23);
+ { uint64_t x24 = (x22 & 0x7ffffffff);
+ { uint64_t x25 = (x23 + x15);
+ { uint64_t x26 = (x25 >> 0x22);
+ { uint64_t x27 = (x25 & 0x3ffffffff);
+ out[0] = x24;
+ out[1] = x27;
+ out[2] = (x26 + x18);
+ out[3] = x21;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e137m13/freeze.c b/src/Specific/solinas64_2e137m13/freeze.c
index d4ed86806..483386c55 100644
--- a/src/Specific/solinas64_2e137m13/freeze.c
+++ b/src/Specific/solinas64_2e137m13/freeze.c
@@ -1,25 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x8;
-out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 35 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7fffffff3;;
+static void freeze(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8, uint8_t x9 = Op (Syntax.SubWithGetBorrow 35 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x7fffffff3);
+ { uint64_t x11, uint8_t x12 = Op (Syntax.SubWithGetBorrow 34 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x9, Return x4, 0x3ffffffff);
+ { uint64_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 34 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x12, Return x6, 0x3ffffffff);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 34 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x15, Return x5, 0x3ffffffff);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0x7fffffff3);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.AddWithGetCarry 35 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x8, Return x20);
+ { uint64_t x24 = (x19 & 0x3ffffffff);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.AddWithGetCarry 34 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x11, Return x24);
+ { uint64_t x28 = (x19 & 0x3ffffffff);
+ { uint64_t x30, uint8_t x31 = Op (Syntax.AddWithGetCarry 34 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x14, Return x28);
+ { uint64_t x32 = (x19 & 0x3ffffffff);
+ { uint64_t x34, uint8_t _ = Op (Syntax.AddWithGetCarry 34 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x31, Return x17, Return x32);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e140m27/femul.c b/src/Specific/solinas64_2e140m27/femul.c
index 7f824389c..60bd565a7 100644
--- a/src/Specific/solinas64_2e140m27/femul.c
+++ b/src/Specific/solinas64_2e140m27/femul.c
@@ -1,46 +1,36 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
-{ uint128_t x17 = ((((uint128_t)x5 * x15) + (((uint128_t)x7 * x13) + ((uint128_t)x9 * x11))) + (0x1b * ((uint128_t)x8 * x14)));
-{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x1b * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
-{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x1b * (((uint128_t)x7 * x14) + (((uint128_t)x9 * x15) + ((uint128_t)x8 * x13)))));
-{ uint64_t x20 = (uint64_t) (x19 >> 0x23);
-{ uint64_t x21 = ((uint64_t)x19 & 0x7ffffffff);
-{ uint128_t x22 = (x20 + x18);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x23);
-{ uint64_t x24 = ((uint64_t)x22 & 0x7ffffffff);
-{ uint128_t x25 = (x23 + x17);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x23);
-{ uint64_t x27 = ((uint64_t)x25 & 0x7ffffffff);
-{ uint128_t x28 = (x26 + x16);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x23);
-{ uint64_t x30 = ((uint64_t)x28 & 0x7ffffffff);
-{ uint64_t x31 = (x21 + (0x1b * x29));
-{ uint64_t x32 = (x31 >> 0x23);
-{ uint64_t x33 = (x31 & 0x7ffffffff);
-{ uint64_t x34 = (x32 + x24);
-{ uint64_t x35 = (x34 >> 0x23);
-{ uint64_t x36 = (x34 & 0x7ffffffff);
-out[0] = x30;
-out[1] = x35 + x27;
-out[2] = x36;
-out[3] = x33;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+ { uint128_t x17 = ((((uint128_t)x5 * x15) + (((uint128_t)x7 * x13) + ((uint128_t)x9 * x11))) + (0x1b * ((uint128_t)x8 * x14)));
+ { uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x1b * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+ { uint128_t x19 = (((uint128_t)x5 * x11) + (0x1b * (((uint128_t)x7 * x14) + (((uint128_t)x9 * x15) + ((uint128_t)x8 * x13)))));
+ { uint64_t x20 = (uint64_t) (x19 >> 0x23);
+ { uint64_t x21 = ((uint64_t)x19 & 0x7ffffffff);
+ { uint128_t x22 = (x20 + x18);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x23);
+ { uint64_t x24 = ((uint64_t)x22 & 0x7ffffffff);
+ { uint128_t x25 = (x23 + x17);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x23);
+ { uint64_t x27 = ((uint64_t)x25 & 0x7ffffffff);
+ { uint128_t x28 = (x26 + x16);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x23);
+ { uint64_t x30 = ((uint64_t)x28 & 0x7ffffffff);
+ { uint64_t x31 = (x21 + (0x1b * x29));
+ { uint64_t x32 = (x31 >> 0x23);
+ { uint64_t x33 = (x31 & 0x7ffffffff);
+ { uint64_t x34 = (x32 + x24);
+ { uint64_t x35 = (x34 >> 0x23);
+ { uint64_t x36 = (x34 & 0x7ffffffff);
+ out[0] = x33;
+ out[1] = x36;
+ out[2] = (x35 + x27);
+ out[3] = x30;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e140m27/fesquare.c b/src/Specific/solinas64_2e140m27/fesquare.c
index ce19267d7..d99bc0584 100644
--- a/src/Specific/solinas64_2e140m27/fesquare.c
+++ b/src/Specific/solinas64_2e140m27/fesquare.c
@@ -1,46 +1,32 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
-{ uint128_t x8 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x1b * ((uint128_t)x5 * x5)));
-{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x1b * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
-{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x1b * (((uint128_t)x4 * x5) + (((uint128_t)x6 * x6) + ((uint128_t)x5 * x4)))));
-{ uint64_t x11 = (uint64_t) (x10 >> 0x23);
-{ uint64_t x12 = ((uint64_t)x10 & 0x7ffffffff);
-{ uint128_t x13 = (x11 + x9);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x23);
-{ uint64_t x15 = ((uint64_t)x13 & 0x7ffffffff);
-{ uint128_t x16 = (x14 + x8);
-{ uint64_t x17 = (uint64_t) (x16 >> 0x23);
-{ uint64_t x18 = ((uint64_t)x16 & 0x7ffffffff);
-{ uint128_t x19 = (x17 + x7);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x23);
-{ uint64_t x21 = ((uint64_t)x19 & 0x7ffffffff);
-{ uint64_t x22 = (x12 + (0x1b * x20));
-{ uint64_t x23 = (x22 >> 0x23);
-{ uint64_t x24 = (x22 & 0x7ffffffff);
-{ uint64_t x25 = (x23 + x15);
-{ uint64_t x26 = (x25 >> 0x23);
-{ uint64_t x27 = (x25 & 0x7ffffffff);
-out[0] = x21;
-out[1] = x26 + x18;
-out[2] = x27;
-out[3] = x24;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesquare(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+ { uint128_t x8 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x1b * ((uint128_t)x5 * x5)));
+ { uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x1b * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+ { uint128_t x10 = (((uint128_t)x2 * x2) + (0x1b * (((uint128_t)x4 * x5) + (((uint128_t)x6 * x6) + ((uint128_t)x5 * x4)))));
+ { uint64_t x11 = (uint64_t) (x10 >> 0x23);
+ { uint64_t x12 = ((uint64_t)x10 & 0x7ffffffff);
+ { uint128_t x13 = (x11 + x9);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x23);
+ { uint64_t x15 = ((uint64_t)x13 & 0x7ffffffff);
+ { uint128_t x16 = (x14 + x8);
+ { uint64_t x17 = (uint64_t) (x16 >> 0x23);
+ { uint64_t x18 = ((uint64_t)x16 & 0x7ffffffff);
+ { uint128_t x19 = (x17 + x7);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x23);
+ { uint64_t x21 = ((uint64_t)x19 & 0x7ffffffff);
+ { uint64_t x22 = (x12 + (0x1b * x20));
+ { uint64_t x23 = (x22 >> 0x23);
+ { uint64_t x24 = (x22 & 0x7ffffffff);
+ { uint64_t x25 = (x23 + x15);
+ { uint64_t x26 = (x25 >> 0x23);
+ { uint64_t x27 = (x25 & 0x7ffffffff);
+ out[0] = x24;
+ out[1] = x27;
+ out[2] = (x26 + x18);
+ out[3] = x21;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e140m27/freeze.c b/src/Specific/solinas64_2e140m27/freeze.c
index 5423b0cdf..f6b72ec72 100644
--- a/src/Specific/solinas64_2e140m27/freeze.c
+++ b/src/Specific/solinas64_2e140m27/freeze.c
@@ -1,25 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x8;
-out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 35 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7ffffffe5;;
+static void freeze(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8, uint8_t x9 = Op (Syntax.SubWithGetBorrow 35 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x7ffffffe5);
+ { uint64_t x11, uint8_t x12 = Op (Syntax.SubWithGetBorrow 35 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x9, Return x4, 0x7ffffffff);
+ { uint64_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 35 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x12, Return x6, 0x7ffffffff);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 35 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x15, Return x5, 0x7ffffffff);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0x7ffffffe5);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.AddWithGetCarry 35 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x8, Return x20);
+ { uint64_t x24 = (x19 & 0x7ffffffff);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.AddWithGetCarry 35 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x11, Return x24);
+ { uint64_t x28 = (x19 & 0x7ffffffff);
+ { uint64_t x30, uint8_t x31 = Op (Syntax.AddWithGetCarry 35 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x14, Return x28);
+ { uint64_t x32 = (x19 & 0x7ffffffff);
+ { uint64_t x34, uint8_t _ = Op (Syntax.AddWithGetCarry 35 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x31, Return x17, Return x32);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e141m9/femul.c b/src/Specific/solinas64_2e141m9/femul.c
index 41f04986c..adfd592ca 100644
--- a/src/Specific/solinas64_2e141m9/femul.c
+++ b/src/Specific/solinas64_2e141m9/femul.c
@@ -1,41 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
-{ uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x9 * ((uint128_t)x6 * x10)));
-{ uint128_t x14 = (((uint128_t)x5 * x9) + (0x9 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
-{ uint64_t x15 = (uint64_t) (x14 >> 0x2f);
-{ uint64_t x16 = ((uint64_t)x14 & 0x7fffffffffff);
-{ uint128_t x17 = (x15 + x13);
-{ uint64_t x18 = (uint64_t) (x17 >> 0x2f);
-{ uint64_t x19 = ((uint64_t)x17 & 0x7fffffffffff);
-{ uint128_t x20 = (x18 + x12);
-{ uint64_t x21 = (uint64_t) (x20 >> 0x2f);
-{ uint64_t x22 = ((uint64_t)x20 & 0x7fffffffffff);
-{ uint64_t x23 = (x16 + (0x9 * x21));
-{ uint64_t x24 = (x23 >> 0x2f);
-{ uint64_t x25 = (x23 & 0x7fffffffffff);
-{ uint64_t x26 = (x24 + x19);
-{ uint64_t x27 = (x26 >> 0x2f);
-{ uint64_t x28 = (x26 & 0x7fffffffffff);
-out[0] = x27 + x22;
-out[1] = x28;
-out[2] = x25;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
+ { uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x9 * ((uint128_t)x6 * x10)));
+ { uint128_t x14 = (((uint128_t)x5 * x9) + (0x9 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
+ { uint64_t x15 = (uint64_t) (x14 >> 0x2f);
+ { uint64_t x16 = ((uint64_t)x14 & 0x7fffffffffff);
+ { uint128_t x17 = (x15 + x13);
+ { uint64_t x18 = (uint64_t) (x17 >> 0x2f);
+ { uint64_t x19 = ((uint64_t)x17 & 0x7fffffffffff);
+ { uint128_t x20 = (x18 + x12);
+ { uint64_t x21 = (uint64_t) (x20 >> 0x2f);
+ { uint64_t x22 = ((uint64_t)x20 & 0x7fffffffffff);
+ { uint64_t x23 = (x16 + (0x9 * x21));
+ { uint64_t x24 = (x23 >> 0x2f);
+ { uint64_t x25 = (x23 & 0x7fffffffffff);
+ { uint64_t x26 = (x24 + x19);
+ { uint64_t x27 = (x26 >> 0x2f);
+ { uint64_t x28 = (x26 & 0x7fffffffffff);
+ out[0] = x25;
+ out[1] = x28;
+ out[2] = (x27 + x22);
+ }}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e141m9/fesquare.c b/src/Specific/solinas64_2e141m9/fesquare.c
index 9152de53a..ddcbf6334 100644
--- a/src/Specific/solinas64_2e141m9/fesquare.c
+++ b/src/Specific/solinas64_2e141m9/fesquare.c
@@ -1,41 +1,26 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
-{ uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x9 * ((uint128_t)x3 * x3)));
-{ uint128_t x7 = (((uint128_t)x2 * x2) + (0x9 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
-{ uint64_t x8 = (uint64_t) (x7 >> 0x2f);
-{ uint64_t x9 = ((uint64_t)x7 & 0x7fffffffffff);
-{ uint128_t x10 = (x8 + x6);
-{ uint64_t x11 = (uint64_t) (x10 >> 0x2f);
-{ uint64_t x12 = ((uint64_t)x10 & 0x7fffffffffff);
-{ uint128_t x13 = (x11 + x5);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x2f);
-{ uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffff);
-{ uint64_t x16 = (x9 + (0x9 * x14));
-{ uint64_t x17 = (x16 >> 0x2f);
-{ uint64_t x18 = (x16 & 0x7fffffffffff);
-{ uint64_t x19 = (x17 + x12);
-{ uint64_t x20 = (x19 >> 0x2f);
-{ uint64_t x21 = (x19 & 0x7fffffffffff);
-out[0] = x20 + x15;
-out[1] = x21;
-out[2] = x18;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesquare(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
+ { uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x9 * ((uint128_t)x3 * x3)));
+ { uint128_t x7 = (((uint128_t)x2 * x2) + (0x9 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
+ { uint64_t x8 = (uint64_t) (x7 >> 0x2f);
+ { uint64_t x9 = ((uint64_t)x7 & 0x7fffffffffff);
+ { uint128_t x10 = (x8 + x6);
+ { uint64_t x11 = (uint64_t) (x10 >> 0x2f);
+ { uint64_t x12 = ((uint64_t)x10 & 0x7fffffffffff);
+ { uint128_t x13 = (x11 + x5);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x2f);
+ { uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffff);
+ { uint64_t x16 = (x9 + (0x9 * x14));
+ { uint64_t x17 = (x16 >> 0x2f);
+ { uint64_t x18 = (x16 & 0x7fffffffffff);
+ { uint64_t x19 = (x17 + x12);
+ { uint64_t x20 = (x19 >> 0x2f);
+ { uint64_t x21 = (x19 & 0x7fffffffffff);
+ out[0] = x18;
+ out[1] = x21;
+ out[2] = (x20 + x15);
+ }}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e141m9/freeze.c b/src/Specific/solinas64_2e141m9/freeze.c
index 97be73608..b4529db52 100644
--- a/src/Specific/solinas64_2e141m9/freeze.c
+++ b/src/Specific/solinas64_2e141m9/freeze.c
@@ -1,25 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x6;
-out[1] = uint8_t x7 = Op Syntax.SubWithGetBorrow 47 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7ffffffffff7;;
+static void freeze(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6, uint8_t x7 = Op (Syntax.SubWithGetBorrow 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x7ffffffffff7);
+ { uint64_t x9, uint8_t x10 = Op (Syntax.SubWithGetBorrow 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x7, Return x4, 0x7fffffffffff);
+ { uint64_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x10, Return x3, 0x7fffffffffff);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0x7ffffffffff7);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.AddWithGetCarry 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x6, Return x15);
+ { uint64_t x19 = (x14 & 0x7fffffffffff);
+ { uint64_t x21, uint8_t x22 = Op (Syntax.AddWithGetCarry 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x18, Return x9, Return x19);
+ { uint64_t x23 = (x14 & 0x7fffffffffff);
+ { uint64_t x25, uint8_t _ = Op (Syntax.AddWithGetCarry 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x22, Return x12, Return x23);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e150m3/femul.c b/src/Specific/solinas64_2e150m3/femul.c
index 92776a173..667c4cdea 100644
--- a/src/Specific/solinas64_2e150m3/femul.c
+++ b/src/Specific/solinas64_2e150m3/femul.c
@@ -1,41 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
-{ uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x3 * ((uint128_t)x6 * x10)));
-{ uint128_t x14 = (((uint128_t)x5 * x9) + (0x3 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
-{ uint64_t x15 = (uint64_t) (x14 >> 0x32);
-{ uint64_t x16 = ((uint64_t)x14 & 0x3ffffffffffff);
-{ uint128_t x17 = (x15 + x13);
-{ uint64_t x18 = (uint64_t) (x17 >> 0x32);
-{ uint64_t x19 = ((uint64_t)x17 & 0x3ffffffffffff);
-{ uint128_t x20 = (x18 + x12);
-{ uint64_t x21 = (uint64_t) (x20 >> 0x32);
-{ uint64_t x22 = ((uint64_t)x20 & 0x3ffffffffffff);
-{ uint64_t x23 = (x16 + (0x3 * x21));
-{ uint64_t x24 = (x23 >> 0x32);
-{ uint64_t x25 = (x23 & 0x3ffffffffffff);
-{ uint64_t x26 = (x24 + x19);
-{ uint64_t x27 = (x26 >> 0x32);
-{ uint64_t x28 = (x26 & 0x3ffffffffffff);
-out[0] = x27 + x22;
-out[1] = x28;
-out[2] = x25;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
+ { uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x3 * ((uint128_t)x6 * x10)));
+ { uint128_t x14 = (((uint128_t)x5 * x9) + (0x3 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
+ { uint64_t x15 = (uint64_t) (x14 >> 0x32);
+ { uint64_t x16 = ((uint64_t)x14 & 0x3ffffffffffff);
+ { uint128_t x17 = (x15 + x13);
+ { uint64_t x18 = (uint64_t) (x17 >> 0x32);
+ { uint64_t x19 = ((uint64_t)x17 & 0x3ffffffffffff);
+ { uint128_t x20 = (x18 + x12);
+ { uint64_t x21 = (uint64_t) (x20 >> 0x32);
+ { uint64_t x22 = ((uint64_t)x20 & 0x3ffffffffffff);
+ { uint64_t x23 = (x16 + (0x3 * x21));
+ { uint64_t x24 = (x23 >> 0x32);
+ { uint64_t x25 = (x23 & 0x3ffffffffffff);
+ { uint64_t x26 = (x24 + x19);
+ { uint64_t x27 = (x26 >> 0x32);
+ { uint64_t x28 = (x26 & 0x3ffffffffffff);
+ out[0] = x25;
+ out[1] = x28;
+ out[2] = (x27 + x22);
+ }}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e150m3/fesquare.c b/src/Specific/solinas64_2e150m3/fesquare.c
index 43701ab9e..1124db94f 100644
--- a/src/Specific/solinas64_2e150m3/fesquare.c
+++ b/src/Specific/solinas64_2e150m3/fesquare.c
@@ -1,41 +1,26 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
-{ uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x3 * ((uint128_t)x3 * x3)));
-{ uint128_t x7 = (((uint128_t)x2 * x2) + (0x3 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
-{ uint64_t x8 = (uint64_t) (x7 >> 0x32);
-{ uint64_t x9 = ((uint64_t)x7 & 0x3ffffffffffff);
-{ uint128_t x10 = (x8 + x6);
-{ uint64_t x11 = (uint64_t) (x10 >> 0x32);
-{ uint64_t x12 = ((uint64_t)x10 & 0x3ffffffffffff);
-{ uint128_t x13 = (x11 + x5);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x32);
-{ uint64_t x15 = ((uint64_t)x13 & 0x3ffffffffffff);
-{ uint64_t x16 = (x9 + (0x3 * x14));
-{ uint64_t x17 = (x16 >> 0x32);
-{ uint64_t x18 = (x16 & 0x3ffffffffffff);
-{ uint64_t x19 = (x17 + x12);
-{ uint64_t x20 = (x19 >> 0x32);
-{ uint64_t x21 = (x19 & 0x3ffffffffffff);
-out[0] = x20 + x15;
-out[1] = x21;
-out[2] = x18;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesquare(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
+ { uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x3 * ((uint128_t)x3 * x3)));
+ { uint128_t x7 = (((uint128_t)x2 * x2) + (0x3 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
+ { uint64_t x8 = (uint64_t) (x7 >> 0x32);
+ { uint64_t x9 = ((uint64_t)x7 & 0x3ffffffffffff);
+ { uint128_t x10 = (x8 + x6);
+ { uint64_t x11 = (uint64_t) (x10 >> 0x32);
+ { uint64_t x12 = ((uint64_t)x10 & 0x3ffffffffffff);
+ { uint128_t x13 = (x11 + x5);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x32);
+ { uint64_t x15 = ((uint64_t)x13 & 0x3ffffffffffff);
+ { uint64_t x16 = (x9 + (0x3 * x14));
+ { uint64_t x17 = (x16 >> 0x32);
+ { uint64_t x18 = (x16 & 0x3ffffffffffff);
+ { uint64_t x19 = (x17 + x12);
+ { uint64_t x20 = (x19 >> 0x32);
+ { uint64_t x21 = (x19 & 0x3ffffffffffff);
+ out[0] = x18;
+ out[1] = x21;
+ out[2] = (x20 + x15);
+ }}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e150m3/freeze.c b/src/Specific/solinas64_2e150m3/freeze.c
index 5e3a4ce07..b0014eb20 100644
--- a/src/Specific/solinas64_2e150m3/freeze.c
+++ b/src/Specific/solinas64_2e150m3/freeze.c
@@ -1,25 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x6;
-out[1] = uint8_t x7 = Op Syntax.SubWithGetBorrow 50 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3fffffffffffd;;
+static void freeze(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6, uint8_t x7 = Op (Syntax.SubWithGetBorrow 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x3fffffffffffd);
+ { uint64_t x9, uint8_t x10 = Op (Syntax.SubWithGetBorrow 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x7, Return x4, 0x3ffffffffffff);
+ { uint64_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x10, Return x3, 0x3ffffffffffff);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0x3fffffffffffd);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.AddWithGetCarry 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x6, Return x15);
+ { uint64_t x19 = (x14 & 0x3ffffffffffff);
+ { uint64_t x21, uint8_t x22 = Op (Syntax.AddWithGetCarry 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x18, Return x9, Return x19);
+ { uint64_t x23 = (x14 & 0x3ffffffffffff);
+ { uint64_t x25, uint8_t _ = Op (Syntax.AddWithGetCarry 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x22, Return x12, Return x23);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e150m5/femul.c b/src/Specific/solinas64_2e150m5/femul.c
index 959821713..d79487ffe 100644
--- a/src/Specific/solinas64_2e150m5/femul.c
+++ b/src/Specific/solinas64_2e150m5/femul.c
@@ -1,41 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
-{ uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x5 * ((uint128_t)x6 * x10)));
-{ uint128_t x14 = (((uint128_t)x5 * x9) + (0x5 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
-{ uint64_t x15 = (uint64_t) (x14 >> 0x32);
-{ uint64_t x16 = ((uint64_t)x14 & 0x3ffffffffffff);
-{ uint128_t x17 = (x15 + x13);
-{ uint64_t x18 = (uint64_t) (x17 >> 0x32);
-{ uint64_t x19 = ((uint64_t)x17 & 0x3ffffffffffff);
-{ uint128_t x20 = (x18 + x12);
-{ uint64_t x21 = (uint64_t) (x20 >> 0x32);
-{ uint64_t x22 = ((uint64_t)x20 & 0x3ffffffffffff);
-{ uint64_t x23 = (x16 + (0x5 * x21));
-{ uint64_t x24 = (x23 >> 0x32);
-{ uint64_t x25 = (x23 & 0x3ffffffffffff);
-{ uint64_t x26 = (x24 + x19);
-{ uint64_t x27 = (x26 >> 0x32);
-{ uint64_t x28 = (x26 & 0x3ffffffffffff);
-out[0] = x27 + x22;
-out[1] = x28;
-out[2] = x25;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
+ { uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x5 * ((uint128_t)x6 * x10)));
+ { uint128_t x14 = (((uint128_t)x5 * x9) + (0x5 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
+ { uint64_t x15 = (uint64_t) (x14 >> 0x32);
+ { uint64_t x16 = ((uint64_t)x14 & 0x3ffffffffffff);
+ { uint128_t x17 = (x15 + x13);
+ { uint64_t x18 = (uint64_t) (x17 >> 0x32);
+ { uint64_t x19 = ((uint64_t)x17 & 0x3ffffffffffff);
+ { uint128_t x20 = (x18 + x12);
+ { uint64_t x21 = (uint64_t) (x20 >> 0x32);
+ { uint64_t x22 = ((uint64_t)x20 & 0x3ffffffffffff);
+ { uint64_t x23 = (x16 + (0x5 * x21));
+ { uint64_t x24 = (x23 >> 0x32);
+ { uint64_t x25 = (x23 & 0x3ffffffffffff);
+ { uint64_t x26 = (x24 + x19);
+ { uint64_t x27 = (x26 >> 0x32);
+ { uint64_t x28 = (x26 & 0x3ffffffffffff);
+ out[0] = x25;
+ out[1] = x28;
+ out[2] = (x27 + x22);
+ }}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e150m5/fesquare.c b/src/Specific/solinas64_2e150m5/fesquare.c
index 46ebd9428..f3c699e60 100644
--- a/src/Specific/solinas64_2e150m5/fesquare.c
+++ b/src/Specific/solinas64_2e150m5/fesquare.c
@@ -1,41 +1,26 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
-{ uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x5 * ((uint128_t)x3 * x3)));
-{ uint128_t x7 = (((uint128_t)x2 * x2) + (0x5 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
-{ uint64_t x8 = (uint64_t) (x7 >> 0x32);
-{ uint64_t x9 = ((uint64_t)x7 & 0x3ffffffffffff);
-{ uint128_t x10 = (x8 + x6);
-{ uint64_t x11 = (uint64_t) (x10 >> 0x32);
-{ uint64_t x12 = ((uint64_t)x10 & 0x3ffffffffffff);
-{ uint128_t x13 = (x11 + x5);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x32);
-{ uint64_t x15 = ((uint64_t)x13 & 0x3ffffffffffff);
-{ uint64_t x16 = (x9 + (0x5 * x14));
-{ uint64_t x17 = (x16 >> 0x32);
-{ uint64_t x18 = (x16 & 0x3ffffffffffff);
-{ uint64_t x19 = (x17 + x12);
-{ uint64_t x20 = (x19 >> 0x32);
-{ uint64_t x21 = (x19 & 0x3ffffffffffff);
-out[0] = x20 + x15;
-out[1] = x21;
-out[2] = x18;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesquare(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
+ { uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x5 * ((uint128_t)x3 * x3)));
+ { uint128_t x7 = (((uint128_t)x2 * x2) + (0x5 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
+ { uint64_t x8 = (uint64_t) (x7 >> 0x32);
+ { uint64_t x9 = ((uint64_t)x7 & 0x3ffffffffffff);
+ { uint128_t x10 = (x8 + x6);
+ { uint64_t x11 = (uint64_t) (x10 >> 0x32);
+ { uint64_t x12 = ((uint64_t)x10 & 0x3ffffffffffff);
+ { uint128_t x13 = (x11 + x5);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x32);
+ { uint64_t x15 = ((uint64_t)x13 & 0x3ffffffffffff);
+ { uint64_t x16 = (x9 + (0x5 * x14));
+ { uint64_t x17 = (x16 >> 0x32);
+ { uint64_t x18 = (x16 & 0x3ffffffffffff);
+ { uint64_t x19 = (x17 + x12);
+ { uint64_t x20 = (x19 >> 0x32);
+ { uint64_t x21 = (x19 & 0x3ffffffffffff);
+ out[0] = x18;
+ out[1] = x21;
+ out[2] = (x20 + x15);
+ }}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e150m5/freeze.c b/src/Specific/solinas64_2e150m5/freeze.c
index b6f47f0b9..69d637ccb 100644
--- a/src/Specific/solinas64_2e150m5/freeze.c
+++ b/src/Specific/solinas64_2e150m5/freeze.c
@@ -1,25 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x6;
-out[1] = uint8_t x7 = Op Syntax.SubWithGetBorrow 50 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3fffffffffffb;;
+static void freeze(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6, uint8_t x7 = Op (Syntax.SubWithGetBorrow 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x3fffffffffffb);
+ { uint64_t x9, uint8_t x10 = Op (Syntax.SubWithGetBorrow 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x7, Return x4, 0x3ffffffffffff);
+ { uint64_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x10, Return x3, 0x3ffffffffffff);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0x3fffffffffffb);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.AddWithGetCarry 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x6, Return x15);
+ { uint64_t x19 = (x14 & 0x3ffffffffffff);
+ { uint64_t x21, uint8_t x22 = Op (Syntax.AddWithGetCarry 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x18, Return x9, Return x19);
+ { uint64_t x23 = (x14 & 0x3ffffffffffff);
+ { uint64_t x25, uint8_t _ = Op (Syntax.AddWithGetCarry 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x22, Return x12, Return x23);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e152m17/femul.c b/src/Specific/solinas64_2e152m17/femul.c
index d3005d0da..c351efd1e 100644
--- a/src/Specific/solinas64_2e152m17/femul.c
+++ b/src/Specific/solinas64_2e152m17/femul.c
@@ -1,46 +1,36 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
-{ uint128_t x17 = ((((uint128_t)x5 * x15) + (((uint128_t)x7 * x13) + ((uint128_t)x9 * x11))) + (0x11 * ((uint128_t)x8 * x14)));
-{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x11 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
-{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x11 * (((uint128_t)x7 * x14) + (((uint128_t)x9 * x15) + ((uint128_t)x8 * x13)))));
-{ uint64_t x20 = (uint64_t) (x19 >> 0x26);
-{ uint64_t x21 = ((uint64_t)x19 & 0x3fffffffff);
-{ uint128_t x22 = (x20 + x18);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x26);
-{ uint64_t x24 = ((uint64_t)x22 & 0x3fffffffff);
-{ uint128_t x25 = (x23 + x17);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x26);
-{ uint64_t x27 = ((uint64_t)x25 & 0x3fffffffff);
-{ uint128_t x28 = (x26 + x16);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x26);
-{ uint64_t x30 = ((uint64_t)x28 & 0x3fffffffff);
-{ uint64_t x31 = (x21 + (0x11 * x29));
-{ uint64_t x32 = (x31 >> 0x26);
-{ uint64_t x33 = (x31 & 0x3fffffffff);
-{ uint64_t x34 = (x32 + x24);
-{ uint64_t x35 = (x34 >> 0x26);
-{ uint64_t x36 = (x34 & 0x3fffffffff);
-out[0] = x30;
-out[1] = x35 + x27;
-out[2] = x36;
-out[3] = x33;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+ { uint128_t x17 = ((((uint128_t)x5 * x15) + (((uint128_t)x7 * x13) + ((uint128_t)x9 * x11))) + (0x11 * ((uint128_t)x8 * x14)));
+ { uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x11 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+ { uint128_t x19 = (((uint128_t)x5 * x11) + (0x11 * (((uint128_t)x7 * x14) + (((uint128_t)x9 * x15) + ((uint128_t)x8 * x13)))));
+ { uint64_t x20 = (uint64_t) (x19 >> 0x26);
+ { uint64_t x21 = ((uint64_t)x19 & 0x3fffffffff);
+ { uint128_t x22 = (x20 + x18);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x26);
+ { uint64_t x24 = ((uint64_t)x22 & 0x3fffffffff);
+ { uint128_t x25 = (x23 + x17);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x26);
+ { uint64_t x27 = ((uint64_t)x25 & 0x3fffffffff);
+ { uint128_t x28 = (x26 + x16);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x26);
+ { uint64_t x30 = ((uint64_t)x28 & 0x3fffffffff);
+ { uint64_t x31 = (x21 + (0x11 * x29));
+ { uint64_t x32 = (x31 >> 0x26);
+ { uint64_t x33 = (x31 & 0x3fffffffff);
+ { uint64_t x34 = (x32 + x24);
+ { uint64_t x35 = (x34 >> 0x26);
+ { uint64_t x36 = (x34 & 0x3fffffffff);
+ out[0] = x33;
+ out[1] = x36;
+ out[2] = (x35 + x27);
+ out[3] = x30;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e152m17/fesquare.c b/src/Specific/solinas64_2e152m17/fesquare.c
index f265f13d1..d8cf4dafa 100644
--- a/src/Specific/solinas64_2e152m17/fesquare.c
+++ b/src/Specific/solinas64_2e152m17/fesquare.c
@@ -1,46 +1,32 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
-{ uint128_t x8 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x11 * ((uint128_t)x5 * x5)));
-{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
-{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x11 * (((uint128_t)x4 * x5) + (((uint128_t)x6 * x6) + ((uint128_t)x5 * x4)))));
-{ uint64_t x11 = (uint64_t) (x10 >> 0x26);
-{ uint64_t x12 = ((uint64_t)x10 & 0x3fffffffff);
-{ uint128_t x13 = (x11 + x9);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x26);
-{ uint64_t x15 = ((uint64_t)x13 & 0x3fffffffff);
-{ uint128_t x16 = (x14 + x8);
-{ uint64_t x17 = (uint64_t) (x16 >> 0x26);
-{ uint64_t x18 = ((uint64_t)x16 & 0x3fffffffff);
-{ uint128_t x19 = (x17 + x7);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x26);
-{ uint64_t x21 = ((uint64_t)x19 & 0x3fffffffff);
-{ uint64_t x22 = (x12 + (0x11 * x20));
-{ uint64_t x23 = (x22 >> 0x26);
-{ uint64_t x24 = (x22 & 0x3fffffffff);
-{ uint64_t x25 = (x23 + x15);
-{ uint64_t x26 = (x25 >> 0x26);
-{ uint64_t x27 = (x25 & 0x3fffffffff);
-out[0] = x21;
-out[1] = x26 + x18;
-out[2] = x27;
-out[3] = x24;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesquare(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+ { uint128_t x8 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x11 * ((uint128_t)x5 * x5)));
+ { uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+ { uint128_t x10 = (((uint128_t)x2 * x2) + (0x11 * (((uint128_t)x4 * x5) + (((uint128_t)x6 * x6) + ((uint128_t)x5 * x4)))));
+ { uint64_t x11 = (uint64_t) (x10 >> 0x26);
+ { uint64_t x12 = ((uint64_t)x10 & 0x3fffffffff);
+ { uint128_t x13 = (x11 + x9);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x26);
+ { uint64_t x15 = ((uint64_t)x13 & 0x3fffffffff);
+ { uint128_t x16 = (x14 + x8);
+ { uint64_t x17 = (uint64_t) (x16 >> 0x26);
+ { uint64_t x18 = ((uint64_t)x16 & 0x3fffffffff);
+ { uint128_t x19 = (x17 + x7);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x26);
+ { uint64_t x21 = ((uint64_t)x19 & 0x3fffffffff);
+ { uint64_t x22 = (x12 + (0x11 * x20));
+ { uint64_t x23 = (x22 >> 0x26);
+ { uint64_t x24 = (x22 & 0x3fffffffff);
+ { uint64_t x25 = (x23 + x15);
+ { uint64_t x26 = (x25 >> 0x26);
+ { uint64_t x27 = (x25 & 0x3fffffffff);
+ out[0] = x24;
+ out[1] = x27;
+ out[2] = (x26 + x18);
+ out[3] = x21;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e152m17/freeze.c b/src/Specific/solinas64_2e152m17/freeze.c
index 7ca849a69..0a08e7145 100644
--- a/src/Specific/solinas64_2e152m17/freeze.c
+++ b/src/Specific/solinas64_2e152m17/freeze.c
@@ -1,25 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x8;
-out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 38 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3fffffffef;;
+static void freeze(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8, uint8_t x9 = Op (Syntax.SubWithGetBorrow 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x3fffffffef);
+ { uint64_t x11, uint8_t x12 = Op (Syntax.SubWithGetBorrow 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x9, Return x4, 0x3fffffffff);
+ { uint64_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x12, Return x6, 0x3fffffffff);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x15, Return x5, 0x3fffffffff);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0x3fffffffef);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.AddWithGetCarry 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x8, Return x20);
+ { uint64_t x24 = (x19 & 0x3fffffffff);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.AddWithGetCarry 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x11, Return x24);
+ { uint64_t x28 = (x19 & 0x3fffffffff);
+ { uint64_t x30, uint8_t x31 = Op (Syntax.AddWithGetCarry 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x14, Return x28);
+ { uint64_t x32 = (x19 & 0x3fffffffff);
+ { uint64_t x34, uint8_t _ = Op (Syntax.AddWithGetCarry 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x31, Return x17, Return x32);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e158m15/femul.c b/src/Specific/solinas64_2e158m15/femul.c
index fe126f9a5..6511a391c 100644
--- a/src/Specific/solinas64_2e158m15/femul.c
+++ b/src/Specific/solinas64_2e158m15/femul.c
@@ -1,46 +1,36 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
-{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0xf * (0x2 * ((uint128_t)x8 * x14))));
-{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0xf * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
-{ uint128_t x19 = (((uint128_t)x5 * x11) + (0xf * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
-{ uint64_t x20 = (uint64_t) (x19 >> 0x28);
-{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffff);
-{ uint128_t x22 = (x20 + x18);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x27);
-{ uint64_t x24 = ((uint64_t)x22 & 0x7fffffffff);
-{ uint128_t x25 = (x23 + x17);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x28);
-{ uint64_t x27 = ((uint64_t)x25 & 0xffffffffff);
-{ uint128_t x28 = (x26 + x16);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x27);
-{ uint64_t x30 = ((uint64_t)x28 & 0x7fffffffff);
-{ uint64_t x31 = (x21 + (0xf * x29));
-{ uint64_t x32 = (x31 >> 0x28);
-{ uint64_t x33 = (x31 & 0xffffffffff);
-{ uint64_t x34 = (x32 + x24);
-{ uint64_t x35 = (x34 >> 0x27);
-{ uint64_t x36 = (x34 & 0x7fffffffff);
-out[0] = x30;
-out[1] = x35 + x27;
-out[2] = x36;
-out[3] = x33;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+ { uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0xf * (0x2 * ((uint128_t)x8 * x14))));
+ { uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0xf * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+ { uint128_t x19 = (((uint128_t)x5 * x11) + (0xf * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
+ { uint64_t x20 = (uint64_t) (x19 >> 0x28);
+ { uint64_t x21 = ((uint64_t)x19 & 0xffffffffff);
+ { uint128_t x22 = (x20 + x18);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x27);
+ { uint64_t x24 = ((uint64_t)x22 & 0x7fffffffff);
+ { uint128_t x25 = (x23 + x17);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x28);
+ { uint64_t x27 = ((uint64_t)x25 & 0xffffffffff);
+ { uint128_t x28 = (x26 + x16);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x27);
+ { uint64_t x30 = ((uint64_t)x28 & 0x7fffffffff);
+ { uint64_t x31 = (x21 + (0xf * x29));
+ { uint64_t x32 = (x31 >> 0x28);
+ { uint64_t x33 = (x31 & 0xffffffffff);
+ { uint64_t x34 = (x32 + x24);
+ { uint64_t x35 = (x34 >> 0x27);
+ { uint64_t x36 = (x34 & 0x7fffffffff);
+ out[0] = x33;
+ out[1] = x36;
+ out[2] = (x35 + x27);
+ out[3] = x30;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e158m15/fesquare.c b/src/Specific/solinas64_2e158m15/fesquare.c
index a400d508b..7cce9e8d1 100644
--- a/src/Specific/solinas64_2e158m15/fesquare.c
+++ b/src/Specific/solinas64_2e158m15/fesquare.c
@@ -1,46 +1,32 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
-{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0xf * (0x2 * ((uint128_t)x5 * x5))));
-{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xf * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
-{ uint128_t x10 = (((uint128_t)x2 * x2) + (0xf * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
-{ uint64_t x11 = (uint64_t) (x10 >> 0x28);
-{ uint64_t x12 = ((uint64_t)x10 & 0xffffffffff);
-{ uint128_t x13 = (x11 + x9);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x27);
-{ uint64_t x15 = ((uint64_t)x13 & 0x7fffffffff);
-{ uint128_t x16 = (x14 + x8);
-{ uint64_t x17 = (uint64_t) (x16 >> 0x28);
-{ uint64_t x18 = ((uint64_t)x16 & 0xffffffffff);
-{ uint128_t x19 = (x17 + x7);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x27);
-{ uint64_t x21 = ((uint64_t)x19 & 0x7fffffffff);
-{ uint64_t x22 = (x12 + (0xf * x20));
-{ uint64_t x23 = (x22 >> 0x28);
-{ uint64_t x24 = (x22 & 0xffffffffff);
-{ uint64_t x25 = (x23 + x15);
-{ uint64_t x26 = (x25 >> 0x27);
-{ uint64_t x27 = (x25 & 0x7fffffffff);
-out[0] = x21;
-out[1] = x26 + x18;
-out[2] = x27;
-out[3] = x24;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesquare(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+ { uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0xf * (0x2 * ((uint128_t)x5 * x5))));
+ { uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xf * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+ { uint128_t x10 = (((uint128_t)x2 * x2) + (0xf * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
+ { uint64_t x11 = (uint64_t) (x10 >> 0x28);
+ { uint64_t x12 = ((uint64_t)x10 & 0xffffffffff);
+ { uint128_t x13 = (x11 + x9);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x27);
+ { uint64_t x15 = ((uint64_t)x13 & 0x7fffffffff);
+ { uint128_t x16 = (x14 + x8);
+ { uint64_t x17 = (uint64_t) (x16 >> 0x28);
+ { uint64_t x18 = ((uint64_t)x16 & 0xffffffffff);
+ { uint128_t x19 = (x17 + x7);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x27);
+ { uint64_t x21 = ((uint64_t)x19 & 0x7fffffffff);
+ { uint64_t x22 = (x12 + (0xf * x20));
+ { uint64_t x23 = (x22 >> 0x28);
+ { uint64_t x24 = (x22 & 0xffffffffff);
+ { uint64_t x25 = (x23 + x15);
+ { uint64_t x26 = (x25 >> 0x27);
+ { uint64_t x27 = (x25 & 0x7fffffffff);
+ out[0] = x24;
+ out[1] = x27;
+ out[2] = (x26 + x18);
+ out[3] = x21;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e158m15/freeze.c b/src/Specific/solinas64_2e158m15/freeze.c
index 46dd0dec9..cf4f4ba9c 100644
--- a/src/Specific/solinas64_2e158m15/freeze.c
+++ b/src/Specific/solinas64_2e158m15/freeze.c
@@ -1,25 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x8;
-out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 40 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xfffffffff1;;
+static void freeze(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8, uint8_t x9 = Op (Syntax.SubWithGetBorrow 40 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xfffffffff1);
+ { uint64_t x11, uint8_t x12 = Op (Syntax.SubWithGetBorrow 39 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x9, Return x4, 0x7fffffffff);
+ { uint64_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 40 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x12, Return x6, 0xffffffffff);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 39 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x15, Return x5, 0x7fffffffff);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xfffffffff1);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.AddWithGetCarry 40 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x8, Return x20);
+ { uint64_t x24 = (x19 & 0x7fffffffff);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.AddWithGetCarry 39 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x11, Return x24);
+ { uint64_t x28 = (x19 & 0xffffffffff);
+ { uint64_t x30, uint8_t x31 = Op (Syntax.AddWithGetCarry 40 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x14, Return x28);
+ { uint64_t x32 = (x19 & 0x7fffffffff);
+ { uint64_t x34, uint8_t _ = Op (Syntax.AddWithGetCarry 39 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x31, Return x17, Return x32);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e165m25/femul.c b/src/Specific/solinas64_2e165m25/femul.c
index a737e0bac..79b5c48a0 100644
--- a/src/Specific/solinas64_2e165m25/femul.c
+++ b/src/Specific/solinas64_2e165m25/femul.c
@@ -1,41 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
-{ uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x19 * ((uint128_t)x6 * x10)));
-{ uint128_t x14 = (((uint128_t)x5 * x9) + (0x19 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
-{ uint128_t x15 = (x14 >> 0x37);
-{ uint64_t x16 = ((uint64_t)x14 & 0x7fffffffffffff);
-{ uint128_t x17 = (x15 + x13);
-{ uint64_t x18 = (uint64_t) (x17 >> 0x37);
-{ uint64_t x19 = ((uint64_t)x17 & 0x7fffffffffffff);
-{ uint128_t x20 = (x18 + x12);
-{ uint64_t x21 = (uint64_t) (x20 >> 0x37);
-{ uint64_t x22 = ((uint64_t)x20 & 0x7fffffffffffff);
-{ uint128_t x23 = (x16 + ((uint128_t)0x19 * x21));
-{ uint64_t x24 = (uint64_t) (x23 >> 0x37);
-{ uint64_t x25 = ((uint64_t)x23 & 0x7fffffffffffff);
-{ uint64_t x26 = (x24 + x19);
-{ uint64_t x27 = (x26 >> 0x37);
-{ uint64_t x28 = (x26 & 0x7fffffffffffff);
-out[0] = x27 + x22;
-out[1] = x28;
-out[2] = x25;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
+ { uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x19 * ((uint128_t)x6 * x10)));
+ { uint128_t x14 = (((uint128_t)x5 * x9) + (0x19 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
+ { uint128_t x15 = (x14 >> 0x37);
+ { uint64_t x16 = ((uint64_t)x14 & 0x7fffffffffffff);
+ { uint128_t x17 = (x15 + x13);
+ { uint64_t x18 = (uint64_t) (x17 >> 0x37);
+ { uint64_t x19 = ((uint64_t)x17 & 0x7fffffffffffff);
+ { uint128_t x20 = (x18 + x12);
+ { uint64_t x21 = (uint64_t) (x20 >> 0x37);
+ { uint64_t x22 = ((uint64_t)x20 & 0x7fffffffffffff);
+ { uint128_t x23 = (x16 + ((uint128_t)0x19 * x21));
+ { uint64_t x24 = (uint64_t) (x23 >> 0x37);
+ { uint64_t x25 = ((uint64_t)x23 & 0x7fffffffffffff);
+ { uint64_t x26 = (x24 + x19);
+ { uint64_t x27 = (x26 >> 0x37);
+ { uint64_t x28 = (x26 & 0x7fffffffffffff);
+ out[0] = x25;
+ out[1] = x28;
+ out[2] = (x27 + x22);
+ }}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e165m25/fesquare.c b/src/Specific/solinas64_2e165m25/fesquare.c
index 34b1fe082..4920e64b1 100644
--- a/src/Specific/solinas64_2e165m25/fesquare.c
+++ b/src/Specific/solinas64_2e165m25/fesquare.c
@@ -1,41 +1,26 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
-{ uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x19 * ((uint128_t)x3 * x3)));
-{ uint128_t x7 = (((uint128_t)x2 * x2) + (0x19 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
-{ uint128_t x8 = (x7 >> 0x37);
-{ uint64_t x9 = ((uint64_t)x7 & 0x7fffffffffffff);
-{ uint128_t x10 = (x8 + x6);
-{ uint64_t x11 = (uint64_t) (x10 >> 0x37);
-{ uint64_t x12 = ((uint64_t)x10 & 0x7fffffffffffff);
-{ uint128_t x13 = (x11 + x5);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x37);
-{ uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffffff);
-{ uint128_t x16 = (x9 + ((uint128_t)0x19 * x14));
-{ uint64_t x17 = (uint64_t) (x16 >> 0x37);
-{ uint64_t x18 = ((uint64_t)x16 & 0x7fffffffffffff);
-{ uint64_t x19 = (x17 + x12);
-{ uint64_t x20 = (x19 >> 0x37);
-{ uint64_t x21 = (x19 & 0x7fffffffffffff);
-out[0] = x20 + x15;
-out[1] = x21;
-out[2] = x18;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesquare(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
+ { uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x19 * ((uint128_t)x3 * x3)));
+ { uint128_t x7 = (((uint128_t)x2 * x2) + (0x19 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
+ { uint128_t x8 = (x7 >> 0x37);
+ { uint64_t x9 = ((uint64_t)x7 & 0x7fffffffffffff);
+ { uint128_t x10 = (x8 + x6);
+ { uint64_t x11 = (uint64_t) (x10 >> 0x37);
+ { uint64_t x12 = ((uint64_t)x10 & 0x7fffffffffffff);
+ { uint128_t x13 = (x11 + x5);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x37);
+ { uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffffff);
+ { uint128_t x16 = (x9 + ((uint128_t)0x19 * x14));
+ { uint64_t x17 = (uint64_t) (x16 >> 0x37);
+ { uint64_t x18 = ((uint64_t)x16 & 0x7fffffffffffff);
+ { uint64_t x19 = (x17 + x12);
+ { uint64_t x20 = (x19 >> 0x37);
+ { uint64_t x21 = (x19 & 0x7fffffffffffff);
+ out[0] = x18;
+ out[1] = x21;
+ out[2] = (x20 + x15);
+ }}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e165m25/freeze.c b/src/Specific/solinas64_2e165m25/freeze.c
index 8204e0075..36bdf6e07 100644
--- a/src/Specific/solinas64_2e165m25/freeze.c
+++ b/src/Specific/solinas64_2e165m25/freeze.c
@@ -1,25 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x6;
-out[1] = uint8_t x7 = Op Syntax.SubWithGetBorrow 55 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7fffffffffffe7;;
+static void freeze(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6, uint8_t x7 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x7fffffffffffe7);
+ { uint64_t x9, uint8_t x10 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x7, Return x4, 0x7fffffffffffff);
+ { uint64_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x10, Return x3, 0x7fffffffffffff);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0x7fffffffffffe7);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x6, Return x15);
+ { uint64_t x19 = (x14 & 0x7fffffffffffff);
+ { uint64_t x21, uint8_t x22 = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x18, Return x9, Return x19);
+ { uint64_t x23 = (x14 & 0x7fffffffffffff);
+ { uint64_t x25, uint8_t _ = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x22, Return x12, Return x23);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e166m5/femul.c b/src/Specific/solinas64_2e166m5/femul.c
index 77c28c681..dc6d5fea4 100644
--- a/src/Specific/solinas64_2e166m5/femul.c
+++ b/src/Specific/solinas64_2e166m5/femul.c
@@ -1,41 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint128_t x12 = (((uint128_t)x5 * x10) + ((0x2 * ((uint128_t)x7 * x11)) + ((uint128_t)x6 * x9)));
-{ uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x5 * ((uint128_t)x6 * x10)));
-{ uint128_t x14 = (((uint128_t)x5 * x9) + (0x5 * ((0x2 * ((uint128_t)x7 * x10)) + (0x2 * ((uint128_t)x6 * x11)))));
-{ uint64_t x15 = (uint64_t) (x14 >> 0x38);
-{ uint64_t x16 = ((uint64_t)x14 & 0xffffffffffffff);
-{ uint128_t x17 = (x15 + x13);
-{ uint64_t x18 = (uint64_t) (x17 >> 0x37);
-{ uint64_t x19 = ((uint64_t)x17 & 0x7fffffffffffff);
-{ uint128_t x20 = (x18 + x12);
-{ uint64_t x21 = (uint64_t) (x20 >> 0x37);
-{ uint64_t x22 = ((uint64_t)x20 & 0x7fffffffffffff);
-{ uint64_t x23 = (x16 + (0x5 * x21));
-{ uint64_t x24 = (x23 >> 0x38);
-{ uint64_t x25 = (x23 & 0xffffffffffffff);
-{ uint64_t x26 = (x24 + x19);
-{ uint64_t x27 = (x26 >> 0x37);
-{ uint64_t x28 = (x26 & 0x7fffffffffffff);
-out[0] = x27 + x22;
-out[1] = x28;
-out[2] = x25;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint128_t x12 = (((uint128_t)x5 * x10) + ((0x2 * ((uint128_t)x7 * x11)) + ((uint128_t)x6 * x9)));
+ { uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x5 * ((uint128_t)x6 * x10)));
+ { uint128_t x14 = (((uint128_t)x5 * x9) + (0x5 * ((0x2 * ((uint128_t)x7 * x10)) + (0x2 * ((uint128_t)x6 * x11)))));
+ { uint64_t x15 = (uint64_t) (x14 >> 0x38);
+ { uint64_t x16 = ((uint64_t)x14 & 0xffffffffffffff);
+ { uint128_t x17 = (x15 + x13);
+ { uint64_t x18 = (uint64_t) (x17 >> 0x37);
+ { uint64_t x19 = ((uint64_t)x17 & 0x7fffffffffffff);
+ { uint128_t x20 = (x18 + x12);
+ { uint64_t x21 = (uint64_t) (x20 >> 0x37);
+ { uint64_t x22 = ((uint64_t)x20 & 0x7fffffffffffff);
+ { uint64_t x23 = (x16 + (0x5 * x21));
+ { uint64_t x24 = (x23 >> 0x38);
+ { uint64_t x25 = (x23 & 0xffffffffffffff);
+ { uint64_t x26 = (x24 + x19);
+ { uint64_t x27 = (x26 >> 0x37);
+ { uint64_t x28 = (x26 & 0x7fffffffffffff);
+ out[0] = x25;
+ out[1] = x28;
+ out[2] = (x27 + x22);
+ }}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e166m5/fesquare.c b/src/Specific/solinas64_2e166m5/fesquare.c
index 983b03769..ba2f12c7e 100644
--- a/src/Specific/solinas64_2e166m5/fesquare.c
+++ b/src/Specific/solinas64_2e166m5/fesquare.c
@@ -1,41 +1,26 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint128_t x5 = (((uint128_t)x2 * x3) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x3 * x2)));
-{ uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x5 * ((uint128_t)x3 * x3)));
-{ uint128_t x7 = (((uint128_t)x2 * x2) + (0x5 * ((0x2 * ((uint128_t)x4 * x3)) + (0x2 * ((uint128_t)x3 * x4)))));
-{ uint64_t x8 = (uint64_t) (x7 >> 0x38);
-{ uint64_t x9 = ((uint64_t)x7 & 0xffffffffffffff);
-{ uint128_t x10 = (x8 + x6);
-{ uint64_t x11 = (uint64_t) (x10 >> 0x37);
-{ uint64_t x12 = ((uint64_t)x10 & 0x7fffffffffffff);
-{ uint128_t x13 = (x11 + x5);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x37);
-{ uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffffff);
-{ uint64_t x16 = (x9 + (0x5 * x14));
-{ uint64_t x17 = (x16 >> 0x38);
-{ uint64_t x18 = (x16 & 0xffffffffffffff);
-{ uint64_t x19 = (x17 + x12);
-{ uint64_t x20 = (x19 >> 0x37);
-{ uint64_t x21 = (x19 & 0x7fffffffffffff);
-out[0] = x20 + x15;
-out[1] = x21;
-out[2] = x18;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesquare(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x5 = (((uint128_t)x2 * x3) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x3 * x2)));
+ { uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x5 * ((uint128_t)x3 * x3)));
+ { uint128_t x7 = (((uint128_t)x2 * x2) + (0x5 * ((0x2 * ((uint128_t)x4 * x3)) + (0x2 * ((uint128_t)x3 * x4)))));
+ { uint64_t x8 = (uint64_t) (x7 >> 0x38);
+ { uint64_t x9 = ((uint64_t)x7 & 0xffffffffffffff);
+ { uint128_t x10 = (x8 + x6);
+ { uint64_t x11 = (uint64_t) (x10 >> 0x37);
+ { uint64_t x12 = ((uint64_t)x10 & 0x7fffffffffffff);
+ { uint128_t x13 = (x11 + x5);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x37);
+ { uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffffff);
+ { uint64_t x16 = (x9 + (0x5 * x14));
+ { uint64_t x17 = (x16 >> 0x38);
+ { uint64_t x18 = (x16 & 0xffffffffffffff);
+ { uint64_t x19 = (x17 + x12);
+ { uint64_t x20 = (x19 >> 0x37);
+ { uint64_t x21 = (x19 & 0x7fffffffffffff);
+ out[0] = x18;
+ out[1] = x21;
+ out[2] = (x20 + x15);
+ }}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e166m5/freeze.c b/src/Specific/solinas64_2e166m5/freeze.c
index 1777c507c..d5c6575a3 100644
--- a/src/Specific/solinas64_2e166m5/freeze.c
+++ b/src/Specific/solinas64_2e166m5/freeze.c
@@ -1,25 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x6;
-out[1] = uint8_t x7 = Op Syntax.SubWithGetBorrow 56 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xfffffffffffffb;;
+static void freeze(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6, uint8_t x7 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xfffffffffffffb);
+ { uint64_t x9, uint8_t x10 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x7, Return x4, 0x7fffffffffffff);
+ { uint64_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x10, Return x3, 0x7fffffffffffff);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0xfffffffffffffb);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x6, Return x15);
+ { uint64_t x19 = (x14 & 0x7fffffffffffff);
+ { uint64_t x21, uint8_t x22 = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x18, Return x9, Return x19);
+ { uint64_t x23 = (x14 & 0x7fffffffffffff);
+ { uint64_t x25, uint8_t _ = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x22, Return x12, Return x23);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e171m19/femul.c b/src/Specific/solinas64_2e171m19/femul.c
index a8f1a1a43..fef824bff 100644
--- a/src/Specific/solinas64_2e171m19/femul.c
+++ b/src/Specific/solinas64_2e171m19/femul.c
@@ -1,41 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
-{ uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x13 * ((uint128_t)x6 * x10)));
-{ uint128_t x14 = (((uint128_t)x5 * x9) + (0x13 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
-{ uint128_t x15 = (x14 >> 0x39);
-{ uint64_t x16 = ((uint64_t)x14 & 0x1ffffffffffffff);
-{ uint128_t x17 = (x15 + x13);
-{ uint128_t x18 = (x17 >> 0x39);
-{ uint64_t x19 = ((uint64_t)x17 & 0x1ffffffffffffff);
-{ uint128_t x20 = (x18 + x12);
-{ uint64_t x21 = (uint64_t) (x20 >> 0x39);
-{ uint64_t x22 = ((uint64_t)x20 & 0x1ffffffffffffff);
-{ uint128_t x23 = (x16 + ((uint128_t)0x13 * x21));
-{ uint64_t x24 = (uint64_t) (x23 >> 0x39);
-{ uint64_t x25 = ((uint64_t)x23 & 0x1ffffffffffffff);
-{ uint64_t x26 = (x24 + x19);
-{ uint64_t x27 = (x26 >> 0x39);
-{ uint64_t x28 = (x26 & 0x1ffffffffffffff);
-out[0] = x27 + x22;
-out[1] = x28;
-out[2] = x25;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
+ { uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x13 * ((uint128_t)x6 * x10)));
+ { uint128_t x14 = (((uint128_t)x5 * x9) + (0x13 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
+ { uint128_t x15 = (x14 >> 0x39);
+ { uint64_t x16 = ((uint64_t)x14 & 0x1ffffffffffffff);
+ { uint128_t x17 = (x15 + x13);
+ { uint128_t x18 = (x17 >> 0x39);
+ { uint64_t x19 = ((uint64_t)x17 & 0x1ffffffffffffff);
+ { uint128_t x20 = (x18 + x12);
+ { uint64_t x21 = (uint64_t) (x20 >> 0x39);
+ { uint64_t x22 = ((uint64_t)x20 & 0x1ffffffffffffff);
+ { uint128_t x23 = (x16 + ((uint128_t)0x13 * x21));
+ { uint64_t x24 = (uint64_t) (x23 >> 0x39);
+ { uint64_t x25 = ((uint64_t)x23 & 0x1ffffffffffffff);
+ { uint64_t x26 = (x24 + x19);
+ { uint64_t x27 = (x26 >> 0x39);
+ { uint64_t x28 = (x26 & 0x1ffffffffffffff);
+ out[0] = x25;
+ out[1] = x28;
+ out[2] = (x27 + x22);
+ }}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e171m19/fesquare.c b/src/Specific/solinas64_2e171m19/fesquare.c
index 2b64141cb..baf29c300 100644
--- a/src/Specific/solinas64_2e171m19/fesquare.c
+++ b/src/Specific/solinas64_2e171m19/fesquare.c
@@ -1,41 +1,26 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
-{ uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x13 * ((uint128_t)x3 * x3)));
-{ uint128_t x7 = (((uint128_t)x2 * x2) + (0x13 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
-{ uint128_t x8 = (x7 >> 0x39);
-{ uint64_t x9 = ((uint64_t)x7 & 0x1ffffffffffffff);
-{ uint128_t x10 = (x8 + x6);
-{ uint128_t x11 = (x10 >> 0x39);
-{ uint64_t x12 = ((uint64_t)x10 & 0x1ffffffffffffff);
-{ uint128_t x13 = (x11 + x5);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x39);
-{ uint64_t x15 = ((uint64_t)x13 & 0x1ffffffffffffff);
-{ uint128_t x16 = (x9 + ((uint128_t)0x13 * x14));
-{ uint64_t x17 = (uint64_t) (x16 >> 0x39);
-{ uint64_t x18 = ((uint64_t)x16 & 0x1ffffffffffffff);
-{ uint64_t x19 = (x17 + x12);
-{ uint64_t x20 = (x19 >> 0x39);
-{ uint64_t x21 = (x19 & 0x1ffffffffffffff);
-out[0] = x20 + x15;
-out[1] = x21;
-out[2] = x18;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesquare(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
+ { uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x13 * ((uint128_t)x3 * x3)));
+ { uint128_t x7 = (((uint128_t)x2 * x2) + (0x13 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
+ { uint128_t x8 = (x7 >> 0x39);
+ { uint64_t x9 = ((uint64_t)x7 & 0x1ffffffffffffff);
+ { uint128_t x10 = (x8 + x6);
+ { uint128_t x11 = (x10 >> 0x39);
+ { uint64_t x12 = ((uint64_t)x10 & 0x1ffffffffffffff);
+ { uint128_t x13 = (x11 + x5);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x39);
+ { uint64_t x15 = ((uint64_t)x13 & 0x1ffffffffffffff);
+ { uint128_t x16 = (x9 + ((uint128_t)0x13 * x14));
+ { uint64_t x17 = (uint64_t) (x16 >> 0x39);
+ { uint64_t x18 = ((uint64_t)x16 & 0x1ffffffffffffff);
+ { uint64_t x19 = (x17 + x12);
+ { uint64_t x20 = (x19 >> 0x39);
+ { uint64_t x21 = (x19 & 0x1ffffffffffffff);
+ out[0] = x18;
+ out[1] = x21;
+ out[2] = (x20 + x15);
+ }}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e171m19/freeze.c b/src/Specific/solinas64_2e171m19/freeze.c
index d394e0b1a..467ff2338 100644
--- a/src/Specific/solinas64_2e171m19/freeze.c
+++ b/src/Specific/solinas64_2e171m19/freeze.c
@@ -1,25 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x6;
-out[1] = uint8_t x7 = Op Syntax.SubWithGetBorrow 57 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x1ffffffffffffed;;
+static void freeze(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6, uint8_t x7 = Op (Syntax.SubWithGetBorrow 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x1ffffffffffffed);
+ { uint64_t x9, uint8_t x10 = Op (Syntax.SubWithGetBorrow 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x7, Return x4, 0x1ffffffffffffff);
+ { uint64_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x10, Return x3, 0x1ffffffffffffff);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0x1ffffffffffffed);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.AddWithGetCarry 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x6, Return x15);
+ { uint64_t x19 = (x14 & 0x1ffffffffffffff);
+ { uint64_t x21, uint8_t x22 = Op (Syntax.AddWithGetCarry 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x18, Return x9, Return x19);
+ { uint64_t x23 = (x14 & 0x1ffffffffffffff);
+ { uint64_t x25, uint8_t _ = Op (Syntax.AddWithGetCarry 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x22, Return x12, Return x23);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e174m17/femul.c b/src/Specific/solinas64_2e174m17/femul.c
index 23271b84a..fdf07cf4c 100644
--- a/src/Specific/solinas64_2e174m17/femul.c
+++ b/src/Specific/solinas64_2e174m17/femul.c
@@ -1,41 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
-{ uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x11 * ((uint128_t)x6 * x10)));
-{ uint128_t x14 = (((uint128_t)x5 * x9) + (0x11 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
-{ uint128_t x15 = (x14 >> 0x3a);
-{ uint64_t x16 = ((uint64_t)x14 & 0x3ffffffffffffff);
-{ uint128_t x17 = (x15 + x13);
-{ uint128_t x18 = (x17 >> 0x3a);
-{ uint64_t x19 = ((uint64_t)x17 & 0x3ffffffffffffff);
-{ uint128_t x20 = (x18 + x12);
-{ uint64_t x21 = (uint64_t) (x20 >> 0x3a);
-{ uint64_t x22 = ((uint64_t)x20 & 0x3ffffffffffffff);
-{ uint128_t x23 = (x16 + ((uint128_t)0x11 * x21));
-{ uint64_t x24 = (uint64_t) (x23 >> 0x3a);
-{ uint64_t x25 = ((uint64_t)x23 & 0x3ffffffffffffff);
-{ uint64_t x26 = (x24 + x19);
-{ uint64_t x27 = (x26 >> 0x3a);
-{ uint64_t x28 = (x26 & 0x3ffffffffffffff);
-out[0] = x27 + x22;
-out[1] = x28;
-out[2] = x25;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
+ { uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x11 * ((uint128_t)x6 * x10)));
+ { uint128_t x14 = (((uint128_t)x5 * x9) + (0x11 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
+ { uint128_t x15 = (x14 >> 0x3a);
+ { uint64_t x16 = ((uint64_t)x14 & 0x3ffffffffffffff);
+ { uint128_t x17 = (x15 + x13);
+ { uint128_t x18 = (x17 >> 0x3a);
+ { uint64_t x19 = ((uint64_t)x17 & 0x3ffffffffffffff);
+ { uint128_t x20 = (x18 + x12);
+ { uint64_t x21 = (uint64_t) (x20 >> 0x3a);
+ { uint64_t x22 = ((uint64_t)x20 & 0x3ffffffffffffff);
+ { uint128_t x23 = (x16 + ((uint128_t)0x11 * x21));
+ { uint64_t x24 = (uint64_t) (x23 >> 0x3a);
+ { uint64_t x25 = ((uint64_t)x23 & 0x3ffffffffffffff);
+ { uint64_t x26 = (x24 + x19);
+ { uint64_t x27 = (x26 >> 0x3a);
+ { uint64_t x28 = (x26 & 0x3ffffffffffffff);
+ out[0] = x25;
+ out[1] = x28;
+ out[2] = (x27 + x22);
+ }}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e174m17/fesquare.c b/src/Specific/solinas64_2e174m17/fesquare.c
index 1df201c99..1364215bc 100644
--- a/src/Specific/solinas64_2e174m17/fesquare.c
+++ b/src/Specific/solinas64_2e174m17/fesquare.c
@@ -1,41 +1,26 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
-{ uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * ((uint128_t)x3 * x3)));
-{ uint128_t x7 = (((uint128_t)x2 * x2) + (0x11 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
-{ uint128_t x8 = (x7 >> 0x3a);
-{ uint64_t x9 = ((uint64_t)x7 & 0x3ffffffffffffff);
-{ uint128_t x10 = (x8 + x6);
-{ uint128_t x11 = (x10 >> 0x3a);
-{ uint64_t x12 = ((uint64_t)x10 & 0x3ffffffffffffff);
-{ uint128_t x13 = (x11 + x5);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x3a);
-{ uint64_t x15 = ((uint64_t)x13 & 0x3ffffffffffffff);
-{ uint128_t x16 = (x9 + ((uint128_t)0x11 * x14));
-{ uint64_t x17 = (uint64_t) (x16 >> 0x3a);
-{ uint64_t x18 = ((uint64_t)x16 & 0x3ffffffffffffff);
-{ uint64_t x19 = (x17 + x12);
-{ uint64_t x20 = (x19 >> 0x3a);
-{ uint64_t x21 = (x19 & 0x3ffffffffffffff);
-out[0] = x20 + x15;
-out[1] = x21;
-out[2] = x18;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesquare(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
+ { uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * ((uint128_t)x3 * x3)));
+ { uint128_t x7 = (((uint128_t)x2 * x2) + (0x11 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
+ { uint128_t x8 = (x7 >> 0x3a);
+ { uint64_t x9 = ((uint64_t)x7 & 0x3ffffffffffffff);
+ { uint128_t x10 = (x8 + x6);
+ { uint128_t x11 = (x10 >> 0x3a);
+ { uint64_t x12 = ((uint64_t)x10 & 0x3ffffffffffffff);
+ { uint128_t x13 = (x11 + x5);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x3a);
+ { uint64_t x15 = ((uint64_t)x13 & 0x3ffffffffffffff);
+ { uint128_t x16 = (x9 + ((uint128_t)0x11 * x14));
+ { uint64_t x17 = (uint64_t) (x16 >> 0x3a);
+ { uint64_t x18 = ((uint64_t)x16 & 0x3ffffffffffffff);
+ { uint64_t x19 = (x17 + x12);
+ { uint64_t x20 = (x19 >> 0x3a);
+ { uint64_t x21 = (x19 & 0x3ffffffffffffff);
+ out[0] = x18;
+ out[1] = x21;
+ out[2] = (x20 + x15);
+ }}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e174m17/freeze.c b/src/Specific/solinas64_2e174m17/freeze.c
index af0c7eacb..812f230b3 100644
--- a/src/Specific/solinas64_2e174m17/freeze.c
+++ b/src/Specific/solinas64_2e174m17/freeze.c
@@ -1,25 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x6;
-out[1] = uint8_t x7 = Op Syntax.SubWithGetBorrow 58 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3ffffffffffffef;;
+static void freeze(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6, uint8_t x7 = Op (Syntax.SubWithGetBorrow 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x3ffffffffffffef);
+ { uint64_t x9, uint8_t x10 = Op (Syntax.SubWithGetBorrow 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x7, Return x4, 0x3ffffffffffffff);
+ { uint64_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x10, Return x3, 0x3ffffffffffffff);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0x3ffffffffffffef);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.AddWithGetCarry 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x6, Return x15);
+ { uint64_t x19 = (x14 & 0x3ffffffffffffff);
+ { uint64_t x21, uint8_t x22 = Op (Syntax.AddWithGetCarry 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x18, Return x9, Return x19);
+ { uint64_t x23 = (x14 & 0x3ffffffffffffff);
+ { uint64_t x25, uint8_t _ = Op (Syntax.AddWithGetCarry 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x22, Return x12, Return x23);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e174m3/femul.c b/src/Specific/solinas64_2e174m3/femul.c
index 4b25d07dd..25915ef8a 100644
--- a/src/Specific/solinas64_2e174m3/femul.c
+++ b/src/Specific/solinas64_2e174m3/femul.c
@@ -1,41 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
-{ uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
-{ uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x3 * ((uint128_t)x6 * x10)));
-{ uint128_t x14 = (((uint128_t)x5 * x9) + (0x3 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
-{ uint128_t x15 = (x14 >> 0x3a);
-{ uint64_t x16 = ((uint64_t)x14 & 0x3ffffffffffffff);
-{ uint128_t x17 = (x15 + x13);
-{ uint64_t x18 = (uint64_t) (x17 >> 0x3a);
-{ uint64_t x19 = ((uint64_t)x17 & 0x3ffffffffffffff);
-{ uint128_t x20 = (x18 + x12);
-{ uint64_t x21 = (uint64_t) (x20 >> 0x3a);
-{ uint64_t x22 = ((uint64_t)x20 & 0x3ffffffffffffff);
-{ uint128_t x23 = (x16 + ((uint128_t)0x3 * x21));
-{ uint64_t x24 = (uint64_t) (x23 >> 0x3a);
-{ uint64_t x25 = ((uint64_t)x23 & 0x3ffffffffffffff);
-{ uint64_t x26 = (x24 + x19);
-{ uint64_t x27 = (x26 >> 0x3a);
-{ uint64_t x28 = (x26 & 0x3ffffffffffffff);
-out[0] = x27 + x22;
-out[1] = x28;
-out[2] = x25;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void femul(uint64_t out[3], const uint64_t in1[3], const uint64_t in2[3]) {
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x10 = in2[2];
+ { const uint64_t x11 = in2[1];
+ { const uint64_t x9 = in2[0];
+ { uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
+ { uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x3 * ((uint128_t)x6 * x10)));
+ { uint128_t x14 = (((uint128_t)x5 * x9) + (0x3 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
+ { uint128_t x15 = (x14 >> 0x3a);
+ { uint64_t x16 = ((uint64_t)x14 & 0x3ffffffffffffff);
+ { uint128_t x17 = (x15 + x13);
+ { uint64_t x18 = (uint64_t) (x17 >> 0x3a);
+ { uint64_t x19 = ((uint64_t)x17 & 0x3ffffffffffffff);
+ { uint128_t x20 = (x18 + x12);
+ { uint64_t x21 = (uint64_t) (x20 >> 0x3a);
+ { uint64_t x22 = ((uint64_t)x20 & 0x3ffffffffffffff);
+ { uint128_t x23 = (x16 + ((uint128_t)0x3 * x21));
+ { uint64_t x24 = (uint64_t) (x23 >> 0x3a);
+ { uint64_t x25 = ((uint64_t)x23 & 0x3ffffffffffffff);
+ { uint64_t x26 = (x24 + x19);
+ { uint64_t x27 = (x26 >> 0x3a);
+ { uint64_t x28 = (x26 & 0x3ffffffffffffff);
+ out[0] = x25;
+ out[1] = x28;
+ out[2] = (x27 + x22);
+ }}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e174m3/fesquare.c b/src/Specific/solinas64_2e174m3/fesquare.c
index 15db1a41d..e4d94711a 100644
--- a/src/Specific/solinas64_2e174m3/fesquare.c
+++ b/src/Specific/solinas64_2e174m3/fesquare.c
@@ -1,41 +1,26 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-{ uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
-{ uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x3 * ((uint128_t)x3 * x3)));
-{ uint128_t x7 = (((uint128_t)x2 * x2) + (0x3 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
-{ uint128_t x8 = (x7 >> 0x3a);
-{ uint64_t x9 = ((uint64_t)x7 & 0x3ffffffffffffff);
-{ uint128_t x10 = (x8 + x6);
-{ uint64_t x11 = (uint64_t) (x10 >> 0x3a);
-{ uint64_t x12 = ((uint64_t)x10 & 0x3ffffffffffffff);
-{ uint128_t x13 = (x11 + x5);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x3a);
-{ uint64_t x15 = ((uint64_t)x13 & 0x3ffffffffffffff);
-{ uint128_t x16 = (x9 + ((uint128_t)0x3 * x14));
-{ uint64_t x17 = (uint64_t) (x16 >> 0x3a);
-{ uint64_t x18 = ((uint64_t)x16 & 0x3ffffffffffffff);
-{ uint64_t x19 = (x17 + x12);
-{ uint64_t x20 = (x19 >> 0x3a);
-{ uint64_t x21 = (x19 & 0x3ffffffffffffff);
-out[0] = x20 + x15;
-out[1] = x21;
-out[2] = x18;
-}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[3];
+static void fesquare(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
+ { uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x3 * ((uint128_t)x3 * x3)));
+ { uint128_t x7 = (((uint128_t)x2 * x2) + (0x3 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
+ { uint128_t x8 = (x7 >> 0x3a);
+ { uint64_t x9 = ((uint64_t)x7 & 0x3ffffffffffffff);
+ { uint128_t x10 = (x8 + x6);
+ { uint64_t x11 = (uint64_t) (x10 >> 0x3a);
+ { uint64_t x12 = ((uint64_t)x10 & 0x3ffffffffffffff);
+ { uint128_t x13 = (x11 + x5);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x3a);
+ { uint64_t x15 = ((uint64_t)x13 & 0x3ffffffffffffff);
+ { uint128_t x16 = (x9 + ((uint128_t)0x3 * x14));
+ { uint64_t x17 = (uint64_t) (x16 >> 0x3a);
+ { uint64_t x18 = ((uint64_t)x16 & 0x3ffffffffffffff);
+ { uint64_t x19 = (x17 + x12);
+ { uint64_t x20 = (x19 >> 0x3a);
+ { uint64_t x21 = (x19 & 0x3ffffffffffffff);
+ out[0] = x18;
+ out[1] = x21;
+ out[2] = (x20 + x15);
+ }}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e174m3/freeze.c b/src/Specific/solinas64_2e174m3/freeze.c
index 57ba69349..1ac044bad 100644
--- a/src/Specific/solinas64_2e174m3/freeze.c
+++ b/src/Specific/solinas64_2e174m3/freeze.c
@@ -1,25 +1,19 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x6;
-out[1] = uint8_t x7 = Op Syntax.SubWithGetBorrow 58 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3fffffffffffffd;;
+static void freeze(uint64_t out[3], const uint64_t in1[3]) {
+ { const uint64_t x3 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x6, uint8_t x7 = Op (Syntax.SubWithGetBorrow 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x3fffffffffffffd);
+ { uint64_t x9, uint8_t x10 = Op (Syntax.SubWithGetBorrow 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x7, Return x4, 0x3ffffffffffffff);
+ { uint64_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x10, Return x3, 0x3ffffffffffffff);
+ { uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+ { uint64_t x15 = (x14 & 0x3fffffffffffffd);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.AddWithGetCarry 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x6, Return x15);
+ { uint64_t x19 = (x14 & 0x3ffffffffffffff);
+ { uint64_t x21, uint8_t x22 = Op (Syntax.AddWithGetCarry 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x18, Return x9, Return x19);
+ { uint64_t x23 = (x14 & 0x3ffffffffffffff);
+ { uint64_t x25, uint8_t _ = Op (Syntax.AddWithGetCarry 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x22, Return x12, Return x23);
+ out[0] = x17;
+ out[1] = x21;
+ out[2] = x25;
+ }}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e189m25/femul.c b/src/Specific/solinas64_2e189m25/femul.c
index 02c646634..a7e14d4be 100644
--- a/src/Specific/solinas64_2e189m25/femul.c
+++ b/src/Specific/solinas64_2e189m25/femul.c
@@ -1,46 +1,36 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint128_t x16 = (((uint128_t)x5 * x14) + ((0x2 * ((uint128_t)x7 * x15)) + ((0x2 * ((uint128_t)x9 * x13)) + ((uint128_t)x8 * x11))));
-{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x19 * ((uint128_t)x8 * x14)));
-{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x19 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
-{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x19 * ((0x2 * ((uint128_t)x7 * x14)) + ((0x2 * ((uint128_t)x9 * x15)) + (0x2 * ((uint128_t)x8 * x13))))));
-{ uint64_t x20 = (uint64_t) (x19 >> 0x30);
-{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffffff);
-{ uint128_t x22 = (x20 + x18);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x2f);
-{ uint64_t x24 = ((uint64_t)x22 & 0x7fffffffffff);
-{ uint128_t x25 = (x23 + x17);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x2f);
-{ uint64_t x27 = ((uint64_t)x25 & 0x7fffffffffff);
-{ uint128_t x28 = (x26 + x16);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x2f);
-{ uint64_t x30 = ((uint64_t)x28 & 0x7fffffffffff);
-{ uint64_t x31 = (x21 + (0x19 * x29));
-{ uint64_t x32 = (x31 >> 0x30);
-{ uint64_t x33 = (x31 & 0xffffffffffff);
-{ uint64_t x34 = (x32 + x24);
-{ uint64_t x35 = (x34 >> 0x2f);
-{ uint64_t x36 = (x34 & 0x7fffffffffff);
-out[0] = x30;
-out[1] = x35 + x27;
-out[2] = x36;
-out[3] = x33;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint128_t x16 = (((uint128_t)x5 * x14) + ((0x2 * ((uint128_t)x7 * x15)) + ((0x2 * ((uint128_t)x9 * x13)) + ((uint128_t)x8 * x11))));
+ { uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x19 * ((uint128_t)x8 * x14)));
+ { uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x19 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+ { uint128_t x19 = (((uint128_t)x5 * x11) + (0x19 * ((0x2 * ((uint128_t)x7 * x14)) + ((0x2 * ((uint128_t)x9 * x15)) + (0x2 * ((uint128_t)x8 * x13))))));
+ { uint64_t x20 = (uint64_t) (x19 >> 0x30);
+ { uint64_t x21 = ((uint64_t)x19 & 0xffffffffffff);
+ { uint128_t x22 = (x20 + x18);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x2f);
+ { uint64_t x24 = ((uint64_t)x22 & 0x7fffffffffff);
+ { uint128_t x25 = (x23 + x17);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x2f);
+ { uint64_t x27 = ((uint64_t)x25 & 0x7fffffffffff);
+ { uint128_t x28 = (x26 + x16);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x2f);
+ { uint64_t x30 = ((uint64_t)x28 & 0x7fffffffffff);
+ { uint64_t x31 = (x21 + (0x19 * x29));
+ { uint64_t x32 = (x31 >> 0x30);
+ { uint64_t x33 = (x31 & 0xffffffffffff);
+ { uint64_t x34 = (x32 + x24);
+ { uint64_t x35 = (x34 >> 0x2f);
+ { uint64_t x36 = (x34 & 0x7fffffffffff);
+ out[0] = x33;
+ out[1] = x36;
+ out[2] = (x35 + x27);
+ out[3] = x30;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e189m25/fesquare.c b/src/Specific/solinas64_2e189m25/fesquare.c
index 5b1275f6e..15fd75f8c 100644
--- a/src/Specific/solinas64_2e189m25/fesquare.c
+++ b/src/Specific/solinas64_2e189m25/fesquare.c
@@ -1,46 +1,32 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x7 = (((uint128_t)x2 * x5) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x5 * x2))));
-{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x19 * ((uint128_t)x5 * x5)));
-{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x19 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
-{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x19 * ((0x2 * ((uint128_t)x4 * x5)) + ((0x2 * ((uint128_t)x6 * x6)) + (0x2 * ((uint128_t)x5 * x4))))));
-{ uint64_t x11 = (uint64_t) (x10 >> 0x30);
-{ uint64_t x12 = ((uint64_t)x10 & 0xffffffffffff);
-{ uint128_t x13 = (x11 + x9);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x2f);
-{ uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffff);
-{ uint128_t x16 = (x14 + x8);
-{ uint64_t x17 = (uint64_t) (x16 >> 0x2f);
-{ uint64_t x18 = ((uint64_t)x16 & 0x7fffffffffff);
-{ uint128_t x19 = (x17 + x7);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x2f);
-{ uint64_t x21 = ((uint64_t)x19 & 0x7fffffffffff);
-{ uint64_t x22 = (x12 + (0x19 * x20));
-{ uint64_t x23 = (x22 >> 0x30);
-{ uint64_t x24 = (x22 & 0xffffffffffff);
-{ uint64_t x25 = (x23 + x15);
-{ uint64_t x26 = (x25 >> 0x2f);
-{ uint64_t x27 = (x25 & 0x7fffffffffff);
-out[0] = x21;
-out[1] = x26 + x18;
-out[2] = x27;
-out[3] = x24;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesquare(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x7 = (((uint128_t)x2 * x5) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x5 * x2))));
+ { uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x19 * ((uint128_t)x5 * x5)));
+ { uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x19 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+ { uint128_t x10 = (((uint128_t)x2 * x2) + (0x19 * ((0x2 * ((uint128_t)x4 * x5)) + ((0x2 * ((uint128_t)x6 * x6)) + (0x2 * ((uint128_t)x5 * x4))))));
+ { uint64_t x11 = (uint64_t) (x10 >> 0x30);
+ { uint64_t x12 = ((uint64_t)x10 & 0xffffffffffff);
+ { uint128_t x13 = (x11 + x9);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x2f);
+ { uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffff);
+ { uint128_t x16 = (x14 + x8);
+ { uint64_t x17 = (uint64_t) (x16 >> 0x2f);
+ { uint64_t x18 = ((uint64_t)x16 & 0x7fffffffffff);
+ { uint128_t x19 = (x17 + x7);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x2f);
+ { uint64_t x21 = ((uint64_t)x19 & 0x7fffffffffff);
+ { uint64_t x22 = (x12 + (0x19 * x20));
+ { uint64_t x23 = (x22 >> 0x30);
+ { uint64_t x24 = (x22 & 0xffffffffffff);
+ { uint64_t x25 = (x23 + x15);
+ { uint64_t x26 = (x25 >> 0x2f);
+ { uint64_t x27 = (x25 & 0x7fffffffffff);
+ out[0] = x24;
+ out[1] = x27;
+ out[2] = (x26 + x18);
+ out[3] = x21;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e189m25/freeze.c b/src/Specific/solinas64_2e189m25/freeze.c
index 7c9edcf90..91d37e173 100644
--- a/src/Specific/solinas64_2e189m25/freeze.c
+++ b/src/Specific/solinas64_2e189m25/freeze.c
@@ -1,25 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x8;
-out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 48 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffffffffe7;;
+static void freeze(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8, uint8_t x9 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xffffffffffe7);
+ { uint64_t x11, uint8_t x12 = Op (Syntax.SubWithGetBorrow 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x9, Return x4, 0x7fffffffffff);
+ { uint64_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x12, Return x6, 0x7fffffffffff);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x15, Return x5, 0x7fffffffffff);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xffffffffffe7);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x8, Return x20);
+ { uint64_t x24 = (x19 & 0x7fffffffffff);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.AddWithGetCarry 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x11, Return x24);
+ { uint64_t x28 = (x19 & 0x7fffffffffff);
+ { uint64_t x30, uint8_t x31 = Op (Syntax.AddWithGetCarry 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x14, Return x28);
+ { uint64_t x32 = (x19 & 0x7fffffffffff);
+ { uint64_t x34, uint8_t _ = Op (Syntax.AddWithGetCarry 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x31, Return x17, Return x32);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e190m11/femul.c b/src/Specific/solinas64_2e190m11/femul.c
index 6e06f6a3a..280a7eb48 100644
--- a/src/Specific/solinas64_2e190m11/femul.c
+++ b/src/Specific/solinas64_2e190m11/femul.c
@@ -1,46 +1,36 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
-{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0xb * (0x2 * ((uint128_t)x8 * x14))));
-{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0xb * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
-{ uint128_t x19 = (((uint128_t)x5 * x11) + (0xb * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
-{ uint64_t x20 = (uint64_t) (x19 >> 0x30);
-{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffffff);
-{ uint128_t x22 = (x20 + x18);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x2f);
-{ uint64_t x24 = ((uint64_t)x22 & 0x7fffffffffff);
-{ uint128_t x25 = (x23 + x17);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x30);
-{ uint64_t x27 = ((uint64_t)x25 & 0xffffffffffff);
-{ uint128_t x28 = (x26 + x16);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x2f);
-{ uint64_t x30 = ((uint64_t)x28 & 0x7fffffffffff);
-{ uint64_t x31 = (x21 + (0xb * x29));
-{ uint64_t x32 = (x31 >> 0x30);
-{ uint64_t x33 = (x31 & 0xffffffffffff);
-{ uint64_t x34 = (x32 + x24);
-{ uint64_t x35 = (x34 >> 0x2f);
-{ uint64_t x36 = (x34 & 0x7fffffffffff);
-out[0] = x30;
-out[1] = x35 + x27;
-out[2] = x36;
-out[3] = x33;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+ { uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0xb * (0x2 * ((uint128_t)x8 * x14))));
+ { uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0xb * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+ { uint128_t x19 = (((uint128_t)x5 * x11) + (0xb * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
+ { uint64_t x20 = (uint64_t) (x19 >> 0x30);
+ { uint64_t x21 = ((uint64_t)x19 & 0xffffffffffff);
+ { uint128_t x22 = (x20 + x18);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x2f);
+ { uint64_t x24 = ((uint64_t)x22 & 0x7fffffffffff);
+ { uint128_t x25 = (x23 + x17);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x30);
+ { uint64_t x27 = ((uint64_t)x25 & 0xffffffffffff);
+ { uint128_t x28 = (x26 + x16);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x2f);
+ { uint64_t x30 = ((uint64_t)x28 & 0x7fffffffffff);
+ { uint64_t x31 = (x21 + (0xb * x29));
+ { uint64_t x32 = (x31 >> 0x30);
+ { uint64_t x33 = (x31 & 0xffffffffffff);
+ { uint64_t x34 = (x32 + x24);
+ { uint64_t x35 = (x34 >> 0x2f);
+ { uint64_t x36 = (x34 & 0x7fffffffffff);
+ out[0] = x33;
+ out[1] = x36;
+ out[2] = (x35 + x27);
+ out[3] = x30;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e190m11/fesquare.c b/src/Specific/solinas64_2e190m11/fesquare.c
index b6a920e95..b06d07e9d 100644
--- a/src/Specific/solinas64_2e190m11/fesquare.c
+++ b/src/Specific/solinas64_2e190m11/fesquare.c
@@ -1,46 +1,32 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
-{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0xb * (0x2 * ((uint128_t)x5 * x5))));
-{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xb * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
-{ uint128_t x10 = (((uint128_t)x2 * x2) + (0xb * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
-{ uint64_t x11 = (uint64_t) (x10 >> 0x30);
-{ uint64_t x12 = ((uint64_t)x10 & 0xffffffffffff);
-{ uint128_t x13 = (x11 + x9);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x2f);
-{ uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffff);
-{ uint128_t x16 = (x14 + x8);
-{ uint64_t x17 = (uint64_t) (x16 >> 0x30);
-{ uint64_t x18 = ((uint64_t)x16 & 0xffffffffffff);
-{ uint128_t x19 = (x17 + x7);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x2f);
-{ uint64_t x21 = ((uint64_t)x19 & 0x7fffffffffff);
-{ uint64_t x22 = (x12 + (0xb * x20));
-{ uint64_t x23 = (x22 >> 0x30);
-{ uint64_t x24 = (x22 & 0xffffffffffff);
-{ uint64_t x25 = (x23 + x15);
-{ uint64_t x26 = (x25 >> 0x2f);
-{ uint64_t x27 = (x25 & 0x7fffffffffff);
-out[0] = x21;
-out[1] = x26 + x18;
-out[2] = x27;
-out[3] = x24;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesquare(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+ { uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0xb * (0x2 * ((uint128_t)x5 * x5))));
+ { uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xb * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+ { uint128_t x10 = (((uint128_t)x2 * x2) + (0xb * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
+ { uint64_t x11 = (uint64_t) (x10 >> 0x30);
+ { uint64_t x12 = ((uint64_t)x10 & 0xffffffffffff);
+ { uint128_t x13 = (x11 + x9);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x2f);
+ { uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffff);
+ { uint128_t x16 = (x14 + x8);
+ { uint64_t x17 = (uint64_t) (x16 >> 0x30);
+ { uint64_t x18 = ((uint64_t)x16 & 0xffffffffffff);
+ { uint128_t x19 = (x17 + x7);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x2f);
+ { uint64_t x21 = ((uint64_t)x19 & 0x7fffffffffff);
+ { uint64_t x22 = (x12 + (0xb * x20));
+ { uint64_t x23 = (x22 >> 0x30);
+ { uint64_t x24 = (x22 & 0xffffffffffff);
+ { uint64_t x25 = (x23 + x15);
+ { uint64_t x26 = (x25 >> 0x2f);
+ { uint64_t x27 = (x25 & 0x7fffffffffff);
+ out[0] = x24;
+ out[1] = x27;
+ out[2] = (x26 + x18);
+ out[3] = x21;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e190m11/freeze.c b/src/Specific/solinas64_2e190m11/freeze.c
index 9dffcedef..3ec01360f 100644
--- a/src/Specific/solinas64_2e190m11/freeze.c
+++ b/src/Specific/solinas64_2e190m11/freeze.c
@@ -1,25 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x8;
-out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 48 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xfffffffffff5;;
+static void freeze(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8, uint8_t x9 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xfffffffffff5);
+ { uint64_t x11, uint8_t x12 = Op (Syntax.SubWithGetBorrow 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x9, Return x4, 0x7fffffffffff);
+ { uint64_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x12, Return x6, 0xffffffffffff);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x15, Return x5, 0x7fffffffffff);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xfffffffffff5);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x8, Return x20);
+ { uint64_t x24 = (x19 & 0x7fffffffffff);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.AddWithGetCarry 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x11, Return x24);
+ { uint64_t x28 = (x19 & 0xffffffffffff);
+ { uint64_t x30, uint8_t x31 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x14, Return x28);
+ { uint64_t x32 = (x19 & 0x7fffffffffff);
+ { uint64_t x34, uint8_t _ = Op (Syntax.AddWithGetCarry 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x31, Return x17, Return x32);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e191m19/femul.c b/src/Specific/solinas64_2e191m19/femul.c
index 1283c97e0..ef49510ba 100644
--- a/src/Specific/solinas64_2e191m19/femul.c
+++ b/src/Specific/solinas64_2e191m19/femul.c
@@ -1,51 +1,43 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint128_t x20 = (((uint128_t)x5 * x18) + ((0x2 * ((uint128_t)x7 * x19)) + ((0x2 * ((uint128_t)x9 * x17)) + ((0x2 * ((uint128_t)x11 * x15)) + ((uint128_t)x10 * x13)))));
-{ uint128_t x21 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((0x2 * ((uint128_t)x9 * x15)) + ((uint128_t)x11 * x13)))) + (0x13 * ((uint128_t)x10 * x18)));
-{ uint128_t x22 = ((((uint128_t)x5 * x17) + ((0x2 * ((uint128_t)x7 * x15)) + ((uint128_t)x9 * x13))) + (0x13 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
-{ uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0x13 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
-{ uint128_t x24 = (((uint128_t)x5 * x13) + (0x13 * ((0x2 * ((uint128_t)x7 * x18)) + ((0x2 * ((uint128_t)x9 * x19)) + ((0x2 * ((uint128_t)x11 * x17)) + (0x2 * ((uint128_t)x10 * x15)))))));
-{ uint64_t x25 = (uint64_t) (x24 >> 0x27);
-{ uint64_t x26 = ((uint64_t)x24 & 0x7fffffffff);
-{ uint128_t x27 = (x25 + x23);
-{ uint64_t x28 = (uint64_t) (x27 >> 0x26);
-{ uint64_t x29 = ((uint64_t)x27 & 0x3fffffffff);
-{ uint128_t x30 = (x28 + x22);
-{ uint64_t x31 = (uint64_t) (x30 >> 0x26);
-{ uint64_t x32 = ((uint64_t)x30 & 0x3fffffffff);
-{ uint128_t x33 = (x31 + x21);
-{ uint64_t x34 = (uint64_t) (x33 >> 0x26);
-{ uint64_t x35 = ((uint64_t)x33 & 0x3fffffffff);
-{ uint128_t x36 = (x34 + x20);
-{ uint64_t x37 = (uint64_t) (x36 >> 0x26);
-{ uint64_t x38 = ((uint64_t)x36 & 0x3fffffffff);
-{ uint64_t x39 = (x26 + (0x13 * x37));
-{ uint64_t x40 = (x39 >> 0x27);
-{ uint64_t x41 = (x39 & 0x7fffffffff);
-{ uint64_t x42 = (x40 + x29);
-{ uint64_t x43 = (x42 >> 0x26);
-{ uint64_t x44 = (x42 & 0x3fffffffff);
-out[0] = x38;
-out[1] = x35;
-out[2] = x43 + x32;
-out[3] = x44;
-out[4] = x41;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x18 = in2[4];
+ { const uint64_t x19 = in2[3];
+ { const uint64_t x17 = in2[2];
+ { const uint64_t x15 = in2[1];
+ { const uint64_t x13 = in2[0];
+ { uint128_t x20 = (((uint128_t)x5 * x18) + ((0x2 * ((uint128_t)x7 * x19)) + ((0x2 * ((uint128_t)x9 * x17)) + ((0x2 * ((uint128_t)x11 * x15)) + ((uint128_t)x10 * x13)))));
+ { uint128_t x21 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((0x2 * ((uint128_t)x9 * x15)) + ((uint128_t)x11 * x13)))) + (0x13 * ((uint128_t)x10 * x18)));
+ { uint128_t x22 = ((((uint128_t)x5 * x17) + ((0x2 * ((uint128_t)x7 * x15)) + ((uint128_t)x9 * x13))) + (0x13 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
+ { uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0x13 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
+ { uint128_t x24 = (((uint128_t)x5 * x13) + (0x13 * ((0x2 * ((uint128_t)x7 * x18)) + ((0x2 * ((uint128_t)x9 * x19)) + ((0x2 * ((uint128_t)x11 * x17)) + (0x2 * ((uint128_t)x10 * x15)))))));
+ { uint64_t x25 = (uint64_t) (x24 >> 0x27);
+ { uint64_t x26 = ((uint64_t)x24 & 0x7fffffffff);
+ { uint128_t x27 = (x25 + x23);
+ { uint64_t x28 = (uint64_t) (x27 >> 0x26);
+ { uint64_t x29 = ((uint64_t)x27 & 0x3fffffffff);
+ { uint128_t x30 = (x28 + x22);
+ { uint64_t x31 = (uint64_t) (x30 >> 0x26);
+ { uint64_t x32 = ((uint64_t)x30 & 0x3fffffffff);
+ { uint128_t x33 = (x31 + x21);
+ { uint64_t x34 = (uint64_t) (x33 >> 0x26);
+ { uint64_t x35 = ((uint64_t)x33 & 0x3fffffffff);
+ { uint128_t x36 = (x34 + x20);
+ { uint64_t x37 = (uint64_t) (x36 >> 0x26);
+ { uint64_t x38 = ((uint64_t)x36 & 0x3fffffffff);
+ { uint64_t x39 = (x26 + (0x13 * x37));
+ { uint64_t x40 = (x39 >> 0x27);
+ { uint64_t x41 = (x39 & 0x7fffffffff);
+ { uint64_t x42 = (x40 + x29);
+ { uint64_t x43 = (x42 >> 0x26);
+ { uint64_t x44 = (x42 & 0x3fffffffff);
+ out[0] = x41;
+ out[1] = x44;
+ out[2] = (x43 + x32);
+ out[3] = x35;
+ out[4] = x38;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e191m19/fesquare.c b/src/Specific/solinas64_2e191m19/fesquare.c
index f212bd63c..2f53470de 100644
--- a/src/Specific/solinas64_2e191m19/fesquare.c
+++ b/src/Specific/solinas64_2e191m19/fesquare.c
@@ -1,51 +1,38 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x9 = (((uint128_t)x2 * x7) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x7 * x2)))));
-{ uint128_t x10 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x13 * ((uint128_t)x7 * x7)));
-{ uint128_t x11 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x13 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
-{ uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x13 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
-{ uint128_t x13 = (((uint128_t)x2 * x2) + (0x13 * ((0x2 * ((uint128_t)x4 * x7)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + (0x2 * ((uint128_t)x7 * x4)))))));
-{ uint64_t x14 = (uint64_t) (x13 >> 0x27);
-{ uint64_t x15 = ((uint64_t)x13 & 0x7fffffffff);
-{ uint128_t x16 = (x14 + x12);
-{ uint64_t x17 = (uint64_t) (x16 >> 0x26);
-{ uint64_t x18 = ((uint64_t)x16 & 0x3fffffffff);
-{ uint128_t x19 = (x17 + x11);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x26);
-{ uint64_t x21 = ((uint64_t)x19 & 0x3fffffffff);
-{ uint128_t x22 = (x20 + x10);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x26);
-{ uint64_t x24 = ((uint64_t)x22 & 0x3fffffffff);
-{ uint128_t x25 = (x23 + x9);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x26);
-{ uint64_t x27 = ((uint64_t)x25 & 0x3fffffffff);
-{ uint64_t x28 = (x15 + (0x13 * x26));
-{ uint64_t x29 = (x28 >> 0x27);
-{ uint64_t x30 = (x28 & 0x7fffffffff);
-{ uint64_t x31 = (x29 + x18);
-{ uint64_t x32 = (x31 >> 0x26);
-{ uint64_t x33 = (x31 & 0x3fffffffff);
-out[0] = x27;
-out[1] = x24;
-out[2] = x32 + x21;
-out[3] = x33;
-out[4] = x30;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesquare(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x9 = (((uint128_t)x2 * x7) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x7 * x2)))));
+ { uint128_t x10 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x13 * ((uint128_t)x7 * x7)));
+ { uint128_t x11 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x13 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
+ { uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x13 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
+ { uint128_t x13 = (((uint128_t)x2 * x2) + (0x13 * ((0x2 * ((uint128_t)x4 * x7)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + (0x2 * ((uint128_t)x7 * x4)))))));
+ { uint64_t x14 = (uint64_t) (x13 >> 0x27);
+ { uint64_t x15 = ((uint64_t)x13 & 0x7fffffffff);
+ { uint128_t x16 = (x14 + x12);
+ { uint64_t x17 = (uint64_t) (x16 >> 0x26);
+ { uint64_t x18 = ((uint64_t)x16 & 0x3fffffffff);
+ { uint128_t x19 = (x17 + x11);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x26);
+ { uint64_t x21 = ((uint64_t)x19 & 0x3fffffffff);
+ { uint128_t x22 = (x20 + x10);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x26);
+ { uint64_t x24 = ((uint64_t)x22 & 0x3fffffffff);
+ { uint128_t x25 = (x23 + x9);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x26);
+ { uint64_t x27 = ((uint64_t)x25 & 0x3fffffffff);
+ { uint64_t x28 = (x15 + (0x13 * x26));
+ { uint64_t x29 = (x28 >> 0x27);
+ { uint64_t x30 = (x28 & 0x7fffffffff);
+ { uint64_t x31 = (x29 + x18);
+ { uint64_t x32 = (x31 >> 0x26);
+ { uint64_t x33 = (x31 & 0x3fffffffff);
+ out[0] = x30;
+ out[1] = x33;
+ out[2] = (x32 + x21);
+ out[3] = x24;
+ out[4] = x27;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e191m19/freeze.c b/src/Specific/solinas64_2e191m19/freeze.c
index f34ff816e..777bc6261 100644
--- a/src/Specific/solinas64_2e191m19/freeze.c
+++ b/src/Specific/solinas64_2e191m19/freeze.c
@@ -1,25 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x10;
-out[1] = uint8_t x11 = Op Syntax.SubWithGetBorrow 39 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7fffffffed;;
+static void freeze(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x10, uint8_t x11 = Op (Syntax.SubWithGetBorrow 39 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x7fffffffed);
+ { uint64_t x13, uint8_t x14 = Op (Syntax.SubWithGetBorrow 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x11, Return x4, 0x3fffffffff);
+ { uint64_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x14, Return x6, 0x3fffffffff);
+ { uint64_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x17, Return x8, 0x3fffffffff);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x20, Return x7, 0x3fffffffff);
+ { uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
+ { uint64_t x25 = (x24 & 0x7fffffffed);
+ { uint64_t x27, uint8_t x28 = Op (Syntax.AddWithGetCarry 39 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x10, Return x25);
+ { uint64_t x29 = (x24 & 0x3fffffffff);
+ { uint64_t x31, uint8_t x32 = Op (Syntax.AddWithGetCarry 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x28, Return x13, Return x29);
+ { uint64_t x33 = (x24 & 0x3fffffffff);
+ { uint64_t x35, uint8_t x36 = Op (Syntax.AddWithGetCarry 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x32, Return x16, Return x33);
+ { uint64_t x37 = (x24 & 0x3fffffffff);
+ { uint64_t x39, uint8_t x40 = Op (Syntax.AddWithGetCarry 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x36, Return x19, Return x37);
+ { uint64_t x41 = (x24 & 0x3fffffffff);
+ { uint64_t x43, uint8_t _ = Op (Syntax.AddWithGetCarry 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x40, Return x22, Return x41);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e192m2e64m1/femul.c b/src/Specific/solinas64_2e192m2e64m1/femul.c
index 1ba8252bf..cb6363ab6 100644
--- a/src/Specific/solinas64_2e192m2e64m1/femul.c
+++ b/src/Specific/solinas64_2e192m2e64m1/femul.c
@@ -1,57 +1,47 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint128_t x16 = ((((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11)))) + (0x10000 * ((uint128_t)x8 * x14)));
-{ uint128_t x17 = ((((uint128_t)x5 * x15) + (((uint128_t)x7 * x13) + ((uint128_t)x9 * x11))) + (((uint128_t)x8 * x14) + (0x10000 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15)))));
-{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + ((((uint128_t)x9 * x14) + ((uint128_t)x8 * x15)) + (0x10000 * (((uint128_t)x7 * x14) + (((uint128_t)x9 * x15) + ((uint128_t)x8 * x13))))));
-{ uint128_t x19 = (((uint128_t)x5 * x11) + (((uint128_t)x7 * x14) + (((uint128_t)x9 * x15) + ((uint128_t)x8 * x13))));
-{ uint64_t x20 = (uint64_t) (x19 >> 0x30);
-{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffffff);
-{ uint128_t x22 = (x16 >> 0x30);
-{ uint64_t x23 = ((uint64_t)x16 & 0xffffffffffff);
-{ uint128_t x24 = ((0x1000000000000 * x22) + x23);
-{ uint128_t x25 = (x24 >> 0x30);
-{ uint64_t x26 = ((uint64_t)x24 & 0xffffffffffff);
-{ uint128_t x27 = ((x20 + x18) + (0x10000 * x25));
-{ uint128_t x28 = (x27 >> 0x30);
-{ uint64_t x29 = ((uint64_t)x27 & 0xffffffffffff);
-{ uint128_t x30 = (x21 + x25);
-{ uint64_t x31 = (uint64_t) (x30 >> 0x30);
-{ uint64_t x32 = ((uint64_t)x30 & 0xffffffffffff);
-{ uint128_t x33 = (x28 + x17);
-{ uint128_t x34 = (x33 >> 0x30);
-{ uint64_t x35 = ((uint64_t)x33 & 0xffffffffffff);
-{ uint128_t x36 = (x34 + x26);
-{ uint64_t x37 = (uint64_t) (x36 >> 0x30);
-{ uint64_t x38 = ((uint64_t)x36 & 0xffffffffffff);
-{ uint128_t x39 = (((uint128_t)0x1000000000000 * x37) + x38);
-{ uint64_t x40 = (uint64_t) (x39 >> 0x30);
-{ uint64_t x41 = ((uint64_t)x39 & 0xffffffffffff);
-{ uint64_t x42 = ((x31 + x29) + (0x10000 * x40));
-{ uint64_t x43 = (x42 >> 0x30);
-{ uint64_t x44 = (x42 & 0xffffffffffff);
-{ uint64_t x45 = (x32 + x40);
-{ uint64_t x46 = (x45 >> 0x30);
-{ uint64_t x47 = (x45 & 0xffffffffffff);
-out[0] = x41;
-out[1] = x43 + x35;
-out[2] = x46 + x44;
-out[3] = x47;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint128_t x16 = ((((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11)))) + (0x10000 * ((uint128_t)x8 * x14)));
+ { uint128_t x17 = ((((uint128_t)x5 * x15) + (((uint128_t)x7 * x13) + ((uint128_t)x9 * x11))) + (((uint128_t)x8 * x14) + (0x10000 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15)))));
+ { uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + ((((uint128_t)x9 * x14) + ((uint128_t)x8 * x15)) + (0x10000 * (((uint128_t)x7 * x14) + (((uint128_t)x9 * x15) + ((uint128_t)x8 * x13))))));
+ { uint128_t x19 = (((uint128_t)x5 * x11) + (((uint128_t)x7 * x14) + (((uint128_t)x9 * x15) + ((uint128_t)x8 * x13))));
+ { uint64_t x20 = (uint64_t) (x19 >> 0x30);
+ { uint64_t x21 = ((uint64_t)x19 & 0xffffffffffff);
+ { uint128_t x22 = (x16 >> 0x30);
+ { uint64_t x23 = ((uint64_t)x16 & 0xffffffffffff);
+ { uint128_t x24 = ((0x1000000000000 * x22) + x23);
+ { uint128_t x25 = (x24 >> 0x30);
+ { uint64_t x26 = ((uint64_t)x24 & 0xffffffffffff);
+ { uint128_t x27 = ((x20 + x18) + (0x10000 * x25));
+ { uint128_t x28 = (x27 >> 0x30);
+ { uint64_t x29 = ((uint64_t)x27 & 0xffffffffffff);
+ { uint128_t x30 = (x21 + x25);
+ { uint64_t x31 = (uint64_t) (x30 >> 0x30);
+ { uint64_t x32 = ((uint64_t)x30 & 0xffffffffffff);
+ { uint128_t x33 = (x28 + x17);
+ { uint128_t x34 = (x33 >> 0x30);
+ { uint64_t x35 = ((uint64_t)x33 & 0xffffffffffff);
+ { uint128_t x36 = (x34 + x26);
+ { uint64_t x37 = (uint64_t) (x36 >> 0x30);
+ { uint64_t x38 = ((uint64_t)x36 & 0xffffffffffff);
+ { uint128_t x39 = (((uint128_t)0x1000000000000 * x37) + x38);
+ { uint64_t x40 = (uint64_t) (x39 >> 0x30);
+ { uint64_t x41 = ((uint64_t)x39 & 0xffffffffffff);
+ { uint64_t x42 = ((x31 + x29) + (0x10000 * x40));
+ { uint64_t x43 = (x42 >> 0x30);
+ { uint64_t x44 = (x42 & 0xffffffffffff);
+ { uint64_t x45 = (x32 + x40);
+ { uint64_t x46 = (x45 >> 0x30);
+ { uint64_t x47 = (x45 & 0xffffffffffff);
+ out[0] = x47;
+ out[1] = (x46 + x44);
+ out[2] = (x43 + x35);
+ out[3] = x41;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e192m2e64m1/fesquare.c b/src/Specific/solinas64_2e192m2e64m1/fesquare.c
index 66b85d83e..5988be53a 100644
--- a/src/Specific/solinas64_2e192m2e64m1/fesquare.c
+++ b/src/Specific/solinas64_2e192m2e64m1/fesquare.c
@@ -1,57 +1,43 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x7 = ((((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2)))) + (0x10000 * ((uint128_t)x5 * x5)));
-{ uint128_t x8 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (((uint128_t)x5 * x5) + (0x10000 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6)))));
-{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + ((((uint128_t)x6 * x5) + ((uint128_t)x5 * x6)) + (0x10000 * (((uint128_t)x4 * x5) + (((uint128_t)x6 * x6) + ((uint128_t)x5 * x4))))));
-{ uint128_t x10 = (((uint128_t)x2 * x2) + (((uint128_t)x4 * x5) + (((uint128_t)x6 * x6) + ((uint128_t)x5 * x4))));
-{ uint64_t x11 = (uint64_t) (x10 >> 0x30);
-{ uint64_t x12 = ((uint64_t)x10 & 0xffffffffffff);
-{ uint128_t x13 = (x7 >> 0x30);
-{ uint64_t x14 = ((uint64_t)x7 & 0xffffffffffff);
-{ uint128_t x15 = ((0x1000000000000 * x13) + x14);
-{ uint128_t x16 = (x15 >> 0x30);
-{ uint64_t x17 = ((uint64_t)x15 & 0xffffffffffff);
-{ uint128_t x18 = ((x11 + x9) + (0x10000 * x16));
-{ uint128_t x19 = (x18 >> 0x30);
-{ uint64_t x20 = ((uint64_t)x18 & 0xffffffffffff);
-{ uint128_t x21 = (x12 + x16);
-{ uint64_t x22 = (uint64_t) (x21 >> 0x30);
-{ uint64_t x23 = ((uint64_t)x21 & 0xffffffffffff);
-{ uint128_t x24 = (x19 + x8);
-{ uint128_t x25 = (x24 >> 0x30);
-{ uint64_t x26 = ((uint64_t)x24 & 0xffffffffffff);
-{ uint128_t x27 = (x25 + x17);
-{ uint64_t x28 = (uint64_t) (x27 >> 0x30);
-{ uint64_t x29 = ((uint64_t)x27 & 0xffffffffffff);
-{ uint128_t x30 = (((uint128_t)0x1000000000000 * x28) + x29);
-{ uint64_t x31 = (uint64_t) (x30 >> 0x30);
-{ uint64_t x32 = ((uint64_t)x30 & 0xffffffffffff);
-{ uint64_t x33 = ((x22 + x20) + (0x10000 * x31));
-{ uint64_t x34 = (x33 >> 0x30);
-{ uint64_t x35 = (x33 & 0xffffffffffff);
-{ uint64_t x36 = (x23 + x31);
-{ uint64_t x37 = (x36 >> 0x30);
-{ uint64_t x38 = (x36 & 0xffffffffffff);
-out[0] = x32;
-out[1] = x34 + x26;
-out[2] = x37 + x35;
-out[3] = x38;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesquare(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x7 = ((((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2)))) + (0x10000 * ((uint128_t)x5 * x5)));
+ { uint128_t x8 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (((uint128_t)x5 * x5) + (0x10000 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6)))));
+ { uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + ((((uint128_t)x6 * x5) + ((uint128_t)x5 * x6)) + (0x10000 * (((uint128_t)x4 * x5) + (((uint128_t)x6 * x6) + ((uint128_t)x5 * x4))))));
+ { uint128_t x10 = (((uint128_t)x2 * x2) + (((uint128_t)x4 * x5) + (((uint128_t)x6 * x6) + ((uint128_t)x5 * x4))));
+ { uint64_t x11 = (uint64_t) (x10 >> 0x30);
+ { uint64_t x12 = ((uint64_t)x10 & 0xffffffffffff);
+ { uint128_t x13 = (x7 >> 0x30);
+ { uint64_t x14 = ((uint64_t)x7 & 0xffffffffffff);
+ { uint128_t x15 = ((0x1000000000000 * x13) + x14);
+ { uint128_t x16 = (x15 >> 0x30);
+ { uint64_t x17 = ((uint64_t)x15 & 0xffffffffffff);
+ { uint128_t x18 = ((x11 + x9) + (0x10000 * x16));
+ { uint128_t x19 = (x18 >> 0x30);
+ { uint64_t x20 = ((uint64_t)x18 & 0xffffffffffff);
+ { uint128_t x21 = (x12 + x16);
+ { uint64_t x22 = (uint64_t) (x21 >> 0x30);
+ { uint64_t x23 = ((uint64_t)x21 & 0xffffffffffff);
+ { uint128_t x24 = (x19 + x8);
+ { uint128_t x25 = (x24 >> 0x30);
+ { uint64_t x26 = ((uint64_t)x24 & 0xffffffffffff);
+ { uint128_t x27 = (x25 + x17);
+ { uint64_t x28 = (uint64_t) (x27 >> 0x30);
+ { uint64_t x29 = ((uint64_t)x27 & 0xffffffffffff);
+ { uint128_t x30 = (((uint128_t)0x1000000000000 * x28) + x29);
+ { uint64_t x31 = (uint64_t) (x30 >> 0x30);
+ { uint64_t x32 = ((uint64_t)x30 & 0xffffffffffff);
+ { uint64_t x33 = ((x22 + x20) + (0x10000 * x31));
+ { uint64_t x34 = (x33 >> 0x30);
+ { uint64_t x35 = (x33 & 0xffffffffffff);
+ { uint64_t x36 = (x23 + x31);
+ { uint64_t x37 = (x36 >> 0x30);
+ { uint64_t x38 = (x36 & 0xffffffffffff);
+ out[0] = x38;
+ out[1] = (x37 + x35);
+ out[2] = (x34 + x26);
+ out[3] = x32;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e192m2e64m1/freeze.c b/src/Specific/solinas64_2e192m2e64m1/freeze.c
index 8e3be6645..13fa2920c 100644
--- a/src/Specific/solinas64_2e192m2e64m1/freeze.c
+++ b/src/Specific/solinas64_2e192m2e64m1/freeze.c
@@ -1,25 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x8;
-out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 48 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffffffffff;;
+static void freeze(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8, uint8_t x9 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xffffffffffff);
+ { uint64_t x11, uint8_t x12 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x9, Return x4, 0xfffffffeffff);
+ { uint64_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x12, Return x6, 0xffffffffffff);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x15, Return x5, 0xffffffffffff);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xffffffffffff);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x8, Return x20);
+ { uint64_t x24 = (x19 & 0xfffffffeffff);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x11, Return x24);
+ { uint64_t x28 = (x19 & 0xffffffffffff);
+ { uint64_t x30, uint8_t x31 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x14, Return x28);
+ { uint64_t x32 = (x19 & 0xffffffffffff);
+ { uint64_t x34, uint8_t _ = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x31, Return x17, Return x32);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e194m33/femul.c b/src/Specific/solinas64_2e194m33/femul.c
index 0a72aa067..c9dfe04d0 100644
--- a/src/Specific/solinas64_2e194m33/femul.c
+++ b/src/Specific/solinas64_2e194m33/femul.c
@@ -1,46 +1,36 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
-{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x21 * (0x2 * ((uint128_t)x8 * x14))));
-{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x21 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
-{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x21 * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
-{ uint64_t x20 = (uint64_t) (x19 >> 0x31);
-{ uint64_t x21 = ((uint64_t)x19 & 0x1ffffffffffff);
-{ uint128_t x22 = (x20 + x18);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x30);
-{ uint64_t x24 = ((uint64_t)x22 & 0xffffffffffff);
-{ uint128_t x25 = (x23 + x17);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x31);
-{ uint64_t x27 = ((uint64_t)x25 & 0x1ffffffffffff);
-{ uint128_t x28 = (x26 + x16);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x30);
-{ uint64_t x30 = ((uint64_t)x28 & 0xffffffffffff);
-{ uint64_t x31 = (x21 + (0x21 * x29));
-{ uint64_t x32 = (x31 >> 0x31);
-{ uint64_t x33 = (x31 & 0x1ffffffffffff);
-{ uint64_t x34 = (x32 + x24);
-{ uint64_t x35 = (x34 >> 0x30);
-{ uint64_t x36 = (x34 & 0xffffffffffff);
-out[0] = x30;
-out[1] = x35 + x27;
-out[2] = x36;
-out[3] = x33;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+ { uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x21 * (0x2 * ((uint128_t)x8 * x14))));
+ { uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x21 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+ { uint128_t x19 = (((uint128_t)x5 * x11) + (0x21 * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
+ { uint64_t x20 = (uint64_t) (x19 >> 0x31);
+ { uint64_t x21 = ((uint64_t)x19 & 0x1ffffffffffff);
+ { uint128_t x22 = (x20 + x18);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x30);
+ { uint64_t x24 = ((uint64_t)x22 & 0xffffffffffff);
+ { uint128_t x25 = (x23 + x17);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x31);
+ { uint64_t x27 = ((uint64_t)x25 & 0x1ffffffffffff);
+ { uint128_t x28 = (x26 + x16);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x30);
+ { uint64_t x30 = ((uint64_t)x28 & 0xffffffffffff);
+ { uint64_t x31 = (x21 + (0x21 * x29));
+ { uint64_t x32 = (x31 >> 0x31);
+ { uint64_t x33 = (x31 & 0x1ffffffffffff);
+ { uint64_t x34 = (x32 + x24);
+ { uint64_t x35 = (x34 >> 0x30);
+ { uint64_t x36 = (x34 & 0xffffffffffff);
+ out[0] = x33;
+ out[1] = x36;
+ out[2] = (x35 + x27);
+ out[3] = x30;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e194m33/fesquare.c b/src/Specific/solinas64_2e194m33/fesquare.c
index b2d94772d..877c757a4 100644
--- a/src/Specific/solinas64_2e194m33/fesquare.c
+++ b/src/Specific/solinas64_2e194m33/fesquare.c
@@ -1,46 +1,32 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
-{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x21 * (0x2 * ((uint128_t)x5 * x5))));
-{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x21 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
-{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x21 * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
-{ uint64_t x11 = (uint64_t) (x10 >> 0x31);
-{ uint64_t x12 = ((uint64_t)x10 & 0x1ffffffffffff);
-{ uint128_t x13 = (x11 + x9);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x30);
-{ uint64_t x15 = ((uint64_t)x13 & 0xffffffffffff);
-{ uint128_t x16 = (x14 + x8);
-{ uint64_t x17 = (uint64_t) (x16 >> 0x31);
-{ uint64_t x18 = ((uint64_t)x16 & 0x1ffffffffffff);
-{ uint128_t x19 = (x17 + x7);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x30);
-{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffffff);
-{ uint64_t x22 = (x12 + (0x21 * x20));
-{ uint64_t x23 = (x22 >> 0x31);
-{ uint64_t x24 = (x22 & 0x1ffffffffffff);
-{ uint64_t x25 = (x23 + x15);
-{ uint64_t x26 = (x25 >> 0x30);
-{ uint64_t x27 = (x25 & 0xffffffffffff);
-out[0] = x21;
-out[1] = x26 + x18;
-out[2] = x27;
-out[3] = x24;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesquare(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+ { uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x21 * (0x2 * ((uint128_t)x5 * x5))));
+ { uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x21 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+ { uint128_t x10 = (((uint128_t)x2 * x2) + (0x21 * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
+ { uint64_t x11 = (uint64_t) (x10 >> 0x31);
+ { uint64_t x12 = ((uint64_t)x10 & 0x1ffffffffffff);
+ { uint128_t x13 = (x11 + x9);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x30);
+ { uint64_t x15 = ((uint64_t)x13 & 0xffffffffffff);
+ { uint128_t x16 = (x14 + x8);
+ { uint64_t x17 = (uint64_t) (x16 >> 0x31);
+ { uint64_t x18 = ((uint64_t)x16 & 0x1ffffffffffff);
+ { uint128_t x19 = (x17 + x7);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x30);
+ { uint64_t x21 = ((uint64_t)x19 & 0xffffffffffff);
+ { uint64_t x22 = (x12 + (0x21 * x20));
+ { uint64_t x23 = (x22 >> 0x31);
+ { uint64_t x24 = (x22 & 0x1ffffffffffff);
+ { uint64_t x25 = (x23 + x15);
+ { uint64_t x26 = (x25 >> 0x30);
+ { uint64_t x27 = (x25 & 0xffffffffffff);
+ out[0] = x24;
+ out[1] = x27;
+ out[2] = (x26 + x18);
+ out[3] = x21;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e194m33/freeze.c b/src/Specific/solinas64_2e194m33/freeze.c
index f11db2654..6fb0fa4d3 100644
--- a/src/Specific/solinas64_2e194m33/freeze.c
+++ b/src/Specific/solinas64_2e194m33/freeze.c
@@ -1,25 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x8;
-out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 49 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x1ffffffffffdf;;
+static void freeze(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8, uint8_t x9 = Op (Syntax.SubWithGetBorrow 49 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x1ffffffffffdf);
+ { uint64_t x11, uint8_t x12 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x9, Return x4, 0xffffffffffff);
+ { uint64_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 49 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x12, Return x6, 0x1ffffffffffff);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x15, Return x5, 0xffffffffffff);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0x1ffffffffffdf);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.AddWithGetCarry 49 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x8, Return x20);
+ { uint64_t x24 = (x19 & 0xffffffffffff);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x11, Return x24);
+ { uint64_t x28 = (x19 & 0x1ffffffffffff);
+ { uint64_t x30, uint8_t x31 = Op (Syntax.AddWithGetCarry 49 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x14, Return x28);
+ { uint64_t x32 = (x19 & 0xffffffffffff);
+ { uint64_t x34, uint8_t _ = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x31, Return x17, Return x32);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e196m15/femul.c b/src/Specific/solinas64_2e196m15/femul.c
index 311023c9f..3f661dd84 100644
--- a/src/Specific/solinas64_2e196m15/femul.c
+++ b/src/Specific/solinas64_2e196m15/femul.c
@@ -1,46 +1,36 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
-{ uint128_t x17 = ((((uint128_t)x5 * x15) + (((uint128_t)x7 * x13) + ((uint128_t)x9 * x11))) + (0xf * ((uint128_t)x8 * x14)));
-{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0xf * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
-{ uint128_t x19 = (((uint128_t)x5 * x11) + (0xf * (((uint128_t)x7 * x14) + (((uint128_t)x9 * x15) + ((uint128_t)x8 * x13)))));
-{ uint64_t x20 = (uint64_t) (x19 >> 0x31);
-{ uint64_t x21 = ((uint64_t)x19 & 0x1ffffffffffff);
-{ uint128_t x22 = (x20 + x18);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x31);
-{ uint64_t x24 = ((uint64_t)x22 & 0x1ffffffffffff);
-{ uint128_t x25 = (x23 + x17);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x31);
-{ uint64_t x27 = ((uint64_t)x25 & 0x1ffffffffffff);
-{ uint128_t x28 = (x26 + x16);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x31);
-{ uint64_t x30 = ((uint64_t)x28 & 0x1ffffffffffff);
-{ uint64_t x31 = (x21 + (0xf * x29));
-{ uint64_t x32 = (x31 >> 0x31);
-{ uint64_t x33 = (x31 & 0x1ffffffffffff);
-{ uint64_t x34 = (x32 + x24);
-{ uint64_t x35 = (x34 >> 0x31);
-{ uint64_t x36 = (x34 & 0x1ffffffffffff);
-out[0] = x30;
-out[1] = x35 + x27;
-out[2] = x36;
-out[3] = x33;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+ { uint128_t x17 = ((((uint128_t)x5 * x15) + (((uint128_t)x7 * x13) + ((uint128_t)x9 * x11))) + (0xf * ((uint128_t)x8 * x14)));
+ { uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0xf * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+ { uint128_t x19 = (((uint128_t)x5 * x11) + (0xf * (((uint128_t)x7 * x14) + (((uint128_t)x9 * x15) + ((uint128_t)x8 * x13)))));
+ { uint64_t x20 = (uint64_t) (x19 >> 0x31);
+ { uint64_t x21 = ((uint64_t)x19 & 0x1ffffffffffff);
+ { uint128_t x22 = (x20 + x18);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x31);
+ { uint64_t x24 = ((uint64_t)x22 & 0x1ffffffffffff);
+ { uint128_t x25 = (x23 + x17);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x31);
+ { uint64_t x27 = ((uint64_t)x25 & 0x1ffffffffffff);
+ { uint128_t x28 = (x26 + x16);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x31);
+ { uint64_t x30 = ((uint64_t)x28 & 0x1ffffffffffff);
+ { uint64_t x31 = (x21 + (0xf * x29));
+ { uint64_t x32 = (x31 >> 0x31);
+ { uint64_t x33 = (x31 & 0x1ffffffffffff);
+ { uint64_t x34 = (x32 + x24);
+ { uint64_t x35 = (x34 >> 0x31);
+ { uint64_t x36 = (x34 & 0x1ffffffffffff);
+ out[0] = x33;
+ out[1] = x36;
+ out[2] = (x35 + x27);
+ out[3] = x30;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e196m15/fesquare.c b/src/Specific/solinas64_2e196m15/fesquare.c
index f71b0d879..2679040a4 100644
--- a/src/Specific/solinas64_2e196m15/fesquare.c
+++ b/src/Specific/solinas64_2e196m15/fesquare.c
@@ -1,46 +1,32 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
-{ uint128_t x8 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0xf * ((uint128_t)x5 * x5)));
-{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xf * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
-{ uint128_t x10 = (((uint128_t)x2 * x2) + (0xf * (((uint128_t)x4 * x5) + (((uint128_t)x6 * x6) + ((uint128_t)x5 * x4)))));
-{ uint64_t x11 = (uint64_t) (x10 >> 0x31);
-{ uint64_t x12 = ((uint64_t)x10 & 0x1ffffffffffff);
-{ uint128_t x13 = (x11 + x9);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x31);
-{ uint64_t x15 = ((uint64_t)x13 & 0x1ffffffffffff);
-{ uint128_t x16 = (x14 + x8);
-{ uint64_t x17 = (uint64_t) (x16 >> 0x31);
-{ uint64_t x18 = ((uint64_t)x16 & 0x1ffffffffffff);
-{ uint128_t x19 = (x17 + x7);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x31);
-{ uint64_t x21 = ((uint64_t)x19 & 0x1ffffffffffff);
-{ uint64_t x22 = (x12 + (0xf * x20));
-{ uint64_t x23 = (x22 >> 0x31);
-{ uint64_t x24 = (x22 & 0x1ffffffffffff);
-{ uint64_t x25 = (x23 + x15);
-{ uint64_t x26 = (x25 >> 0x31);
-{ uint64_t x27 = (x25 & 0x1ffffffffffff);
-out[0] = x21;
-out[1] = x26 + x18;
-out[2] = x27;
-out[3] = x24;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesquare(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+ { uint128_t x8 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0xf * ((uint128_t)x5 * x5)));
+ { uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xf * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+ { uint128_t x10 = (((uint128_t)x2 * x2) + (0xf * (((uint128_t)x4 * x5) + (((uint128_t)x6 * x6) + ((uint128_t)x5 * x4)))));
+ { uint64_t x11 = (uint64_t) (x10 >> 0x31);
+ { uint64_t x12 = ((uint64_t)x10 & 0x1ffffffffffff);
+ { uint128_t x13 = (x11 + x9);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x31);
+ { uint64_t x15 = ((uint64_t)x13 & 0x1ffffffffffff);
+ { uint128_t x16 = (x14 + x8);
+ { uint64_t x17 = (uint64_t) (x16 >> 0x31);
+ { uint64_t x18 = ((uint64_t)x16 & 0x1ffffffffffff);
+ { uint128_t x19 = (x17 + x7);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x31);
+ { uint64_t x21 = ((uint64_t)x19 & 0x1ffffffffffff);
+ { uint64_t x22 = (x12 + (0xf * x20));
+ { uint64_t x23 = (x22 >> 0x31);
+ { uint64_t x24 = (x22 & 0x1ffffffffffff);
+ { uint64_t x25 = (x23 + x15);
+ { uint64_t x26 = (x25 >> 0x31);
+ { uint64_t x27 = (x25 & 0x1ffffffffffff);
+ out[0] = x24;
+ out[1] = x27;
+ out[2] = (x26 + x18);
+ out[3] = x21;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e196m15/freeze.c b/src/Specific/solinas64_2e196m15/freeze.c
index c9b2e9e7d..f5ab2b763 100644
--- a/src/Specific/solinas64_2e196m15/freeze.c
+++ b/src/Specific/solinas64_2e196m15/freeze.c
@@ -1,25 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x8;
-out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 49 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x1fffffffffff1;;
+static void freeze(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8, uint8_t x9 = Op (Syntax.SubWithGetBorrow 49 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x1fffffffffff1);
+ { uint64_t x11, uint8_t x12 = Op (Syntax.SubWithGetBorrow 49 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x9, Return x4, 0x1ffffffffffff);
+ { uint64_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 49 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x12, Return x6, 0x1ffffffffffff);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 49 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x15, Return x5, 0x1ffffffffffff);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0x1fffffffffff1);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.AddWithGetCarry 49 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x8, Return x20);
+ { uint64_t x24 = (x19 & 0x1ffffffffffff);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.AddWithGetCarry 49 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x11, Return x24);
+ { uint64_t x28 = (x19 & 0x1ffffffffffff);
+ { uint64_t x30, uint8_t x31 = Op (Syntax.AddWithGetCarry 49 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x14, Return x28);
+ { uint64_t x32 = (x19 & 0x1ffffffffffff);
+ { uint64_t x34, uint8_t _ = Op (Syntax.AddWithGetCarry 49 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x31, Return x17, Return x32);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e198m17/femul.c b/src/Specific/solinas64_2e198m17/femul.c
index aae9230df..6fc4add36 100644
--- a/src/Specific/solinas64_2e198m17/femul.c
+++ b/src/Specific/solinas64_2e198m17/femul.c
@@ -1,46 +1,36 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
-{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x11 * (0x2 * ((uint128_t)x8 * x14))));
-{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x11 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
-{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x11 * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
-{ uint64_t x20 = (uint64_t) (x19 >> 0x32);
-{ uint64_t x21 = ((uint64_t)x19 & 0x3ffffffffffff);
-{ uint128_t x22 = (x20 + x18);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x31);
-{ uint64_t x24 = ((uint64_t)x22 & 0x1ffffffffffff);
-{ uint128_t x25 = (x23 + x17);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x32);
-{ uint64_t x27 = ((uint64_t)x25 & 0x3ffffffffffff);
-{ uint128_t x28 = (x26 + x16);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x31);
-{ uint64_t x30 = ((uint64_t)x28 & 0x1ffffffffffff);
-{ uint64_t x31 = (x21 + (0x11 * x29));
-{ uint64_t x32 = (x31 >> 0x32);
-{ uint64_t x33 = (x31 & 0x3ffffffffffff);
-{ uint64_t x34 = (x32 + x24);
-{ uint64_t x35 = (x34 >> 0x31);
-{ uint64_t x36 = (x34 & 0x1ffffffffffff);
-out[0] = x30;
-out[1] = x35 + x27;
-out[2] = x36;
-out[3] = x33;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+ { uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x11 * (0x2 * ((uint128_t)x8 * x14))));
+ { uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x11 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+ { uint128_t x19 = (((uint128_t)x5 * x11) + (0x11 * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
+ { uint64_t x20 = (uint64_t) (x19 >> 0x32);
+ { uint64_t x21 = ((uint64_t)x19 & 0x3ffffffffffff);
+ { uint128_t x22 = (x20 + x18);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x31);
+ { uint64_t x24 = ((uint64_t)x22 & 0x1ffffffffffff);
+ { uint128_t x25 = (x23 + x17);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x32);
+ { uint64_t x27 = ((uint64_t)x25 & 0x3ffffffffffff);
+ { uint128_t x28 = (x26 + x16);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x31);
+ { uint64_t x30 = ((uint64_t)x28 & 0x1ffffffffffff);
+ { uint64_t x31 = (x21 + (0x11 * x29));
+ { uint64_t x32 = (x31 >> 0x32);
+ { uint64_t x33 = (x31 & 0x3ffffffffffff);
+ { uint64_t x34 = (x32 + x24);
+ { uint64_t x35 = (x34 >> 0x31);
+ { uint64_t x36 = (x34 & 0x1ffffffffffff);
+ out[0] = x33;
+ out[1] = x36;
+ out[2] = (x35 + x27);
+ out[3] = x30;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e198m17/fesquare.c b/src/Specific/solinas64_2e198m17/fesquare.c
index 626385e88..c2571c13f 100644
--- a/src/Specific/solinas64_2e198m17/fesquare.c
+++ b/src/Specific/solinas64_2e198m17/fesquare.c
@@ -1,46 +1,32 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
-{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x11 * (0x2 * ((uint128_t)x5 * x5))));
-{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
-{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x11 * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
-{ uint64_t x11 = (uint64_t) (x10 >> 0x32);
-{ uint64_t x12 = ((uint64_t)x10 & 0x3ffffffffffff);
-{ uint128_t x13 = (x11 + x9);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x31);
-{ uint64_t x15 = ((uint64_t)x13 & 0x1ffffffffffff);
-{ uint128_t x16 = (x14 + x8);
-{ uint64_t x17 = (uint64_t) (x16 >> 0x32);
-{ uint64_t x18 = ((uint64_t)x16 & 0x3ffffffffffff);
-{ uint128_t x19 = (x17 + x7);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x31);
-{ uint64_t x21 = ((uint64_t)x19 & 0x1ffffffffffff);
-{ uint64_t x22 = (x12 + (0x11 * x20));
-{ uint64_t x23 = (x22 >> 0x32);
-{ uint64_t x24 = (x22 & 0x3ffffffffffff);
-{ uint64_t x25 = (x23 + x15);
-{ uint64_t x26 = (x25 >> 0x31);
-{ uint64_t x27 = (x25 & 0x1ffffffffffff);
-out[0] = x21;
-out[1] = x26 + x18;
-out[2] = x27;
-out[3] = x24;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesquare(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+ { uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x11 * (0x2 * ((uint128_t)x5 * x5))));
+ { uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+ { uint128_t x10 = (((uint128_t)x2 * x2) + (0x11 * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
+ { uint64_t x11 = (uint64_t) (x10 >> 0x32);
+ { uint64_t x12 = ((uint64_t)x10 & 0x3ffffffffffff);
+ { uint128_t x13 = (x11 + x9);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x31);
+ { uint64_t x15 = ((uint64_t)x13 & 0x1ffffffffffff);
+ { uint128_t x16 = (x14 + x8);
+ { uint64_t x17 = (uint64_t) (x16 >> 0x32);
+ { uint64_t x18 = ((uint64_t)x16 & 0x3ffffffffffff);
+ { uint128_t x19 = (x17 + x7);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x31);
+ { uint64_t x21 = ((uint64_t)x19 & 0x1ffffffffffff);
+ { uint64_t x22 = (x12 + (0x11 * x20));
+ { uint64_t x23 = (x22 >> 0x32);
+ { uint64_t x24 = (x22 & 0x3ffffffffffff);
+ { uint64_t x25 = (x23 + x15);
+ { uint64_t x26 = (x25 >> 0x31);
+ { uint64_t x27 = (x25 & 0x1ffffffffffff);
+ out[0] = x24;
+ out[1] = x27;
+ out[2] = (x26 + x18);
+ out[3] = x21;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e198m17/freeze.c b/src/Specific/solinas64_2e198m17/freeze.c
index 6f03648b3..49699e061 100644
--- a/src/Specific/solinas64_2e198m17/freeze.c
+++ b/src/Specific/solinas64_2e198m17/freeze.c
@@ -1,25 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x8;
-out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 50 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3ffffffffffef;;
+static void freeze(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8, uint8_t x9 = Op (Syntax.SubWithGetBorrow 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x3ffffffffffef);
+ { uint64_t x11, uint8_t x12 = Op (Syntax.SubWithGetBorrow 49 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x9, Return x4, 0x1ffffffffffff);
+ { uint64_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x12, Return x6, 0x3ffffffffffff);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 49 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x15, Return x5, 0x1ffffffffffff);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0x3ffffffffffef);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.AddWithGetCarry 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x8, Return x20);
+ { uint64_t x24 = (x19 & 0x1ffffffffffff);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.AddWithGetCarry 49 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x11, Return x24);
+ { uint64_t x28 = (x19 & 0x3ffffffffffff);
+ { uint64_t x30, uint8_t x31 = Op (Syntax.AddWithGetCarry 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x14, Return x28);
+ { uint64_t x32 = (x19 & 0x1ffffffffffff);
+ { uint64_t x34, uint8_t _ = Op (Syntax.AddWithGetCarry 49 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x31, Return x17, Return x32);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e205m45x2e198m1/freeze.c b/src/Specific/solinas64_2e205m45x2e198m1/freeze.c
index 0fb0d73ea..92ad7407b 100644
--- a/src/Specific/solinas64_2e205m45x2e198m1/freeze.c
+++ b/src/Specific/solinas64_2e205m45x2e198m1/freeze.c
@@ -1,25 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x8;
-out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 52 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xfffffffffffff;;
+static void freeze(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8, uint8_t x9 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xfffffffffffff);
+ { uint64_t x11; uint8_t x12 = _subborrow_u51(x9, x4, 0x7ffffffffffff, &x11);
+ { uint64_t x14; uint8_t x15 = _subborrow_u51(x12, x6, 0x7ffffffffffff, &x14);
+ { uint64_t x17; uint8_t x18 = _subborrow_u51(x15, x5, 0x52fffffffffff, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xfffffffffffff);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x8, Return x20);
+ { uint64_t x24 = (x19 & 0x7ffffffffffff);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u51(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0x7ffffffffffff);
+ { uint64_t x30; uint8_t x31 = _addcarryx_u51(x27, x14, x28, &x30);
+ { uint64_t x32 = (x19 & 0x52fffffffffff);
+ { uint64_t x34; uint8_t _ = _addcarryx_u51(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e206m5/femul.c b/src/Specific/solinas64_2e206m5/femul.c
index 8642d8816..cc09e2ec7 100644
--- a/src/Specific/solinas64_2e206m5/femul.c
+++ b/src/Specific/solinas64_2e206m5/femul.c
@@ -1,46 +1,36 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
-{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x5 * (0x2 * ((uint128_t)x8 * x14))));
-{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x5 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
-{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x5 * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
-{ uint64_t x20 = (uint64_t) (x19 >> 0x34);
-{ uint64_t x21 = ((uint64_t)x19 & 0xfffffffffffff);
-{ uint128_t x22 = (x20 + x18);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x33);
-{ uint64_t x24 = ((uint64_t)x22 & 0x7ffffffffffff);
-{ uint128_t x25 = (x23 + x17);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x34);
-{ uint64_t x27 = ((uint64_t)x25 & 0xfffffffffffff);
-{ uint128_t x28 = (x26 + x16);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x33);
-{ uint64_t x30 = ((uint64_t)x28 & 0x7ffffffffffff);
-{ uint64_t x31 = (x21 + (0x5 * x29));
-{ uint64_t x32 = (x31 >> 0x34);
-{ uint64_t x33 = (x31 & 0xfffffffffffff);
-{ uint64_t x34 = (x32 + x24);
-{ uint64_t x35 = (x34 >> 0x33);
-{ uint64_t x36 = (x34 & 0x7ffffffffffff);
-out[0] = x30;
-out[1] = x35 + x27;
-out[2] = x36;
-out[3] = x33;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+ { uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x5 * (0x2 * ((uint128_t)x8 * x14))));
+ { uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x5 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+ { uint128_t x19 = (((uint128_t)x5 * x11) + (0x5 * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
+ { uint64_t x20 = (uint64_t) (x19 >> 0x34);
+ { uint64_t x21 = ((uint64_t)x19 & 0xfffffffffffff);
+ { uint128_t x22 = (x20 + x18);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x33);
+ { uint64_t x24 = ((uint64_t)x22 & 0x7ffffffffffff);
+ { uint128_t x25 = (x23 + x17);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x34);
+ { uint64_t x27 = ((uint64_t)x25 & 0xfffffffffffff);
+ { uint128_t x28 = (x26 + x16);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x33);
+ { uint64_t x30 = ((uint64_t)x28 & 0x7ffffffffffff);
+ { uint64_t x31 = (x21 + (0x5 * x29));
+ { uint64_t x32 = (x31 >> 0x34);
+ { uint64_t x33 = (x31 & 0xfffffffffffff);
+ { uint64_t x34 = (x32 + x24);
+ { uint64_t x35 = (x34 >> 0x33);
+ { uint64_t x36 = (x34 & 0x7ffffffffffff);
+ out[0] = x33;
+ out[1] = x36;
+ out[2] = (x35 + x27);
+ out[3] = x30;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e206m5/fesquare.c b/src/Specific/solinas64_2e206m5/fesquare.c
index c3a9b9c6f..df4b48fcf 100644
--- a/src/Specific/solinas64_2e206m5/fesquare.c
+++ b/src/Specific/solinas64_2e206m5/fesquare.c
@@ -1,46 +1,32 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
-{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x5 * (0x2 * ((uint128_t)x5 * x5))));
-{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x5 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
-{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x5 * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
-{ uint64_t x11 = (uint64_t) (x10 >> 0x34);
-{ uint64_t x12 = ((uint64_t)x10 & 0xfffffffffffff);
-{ uint128_t x13 = (x11 + x9);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x33);
-{ uint64_t x15 = ((uint64_t)x13 & 0x7ffffffffffff);
-{ uint128_t x16 = (x14 + x8);
-{ uint64_t x17 = (uint64_t) (x16 >> 0x34);
-{ uint64_t x18 = ((uint64_t)x16 & 0xfffffffffffff);
-{ uint128_t x19 = (x17 + x7);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x33);
-{ uint64_t x21 = ((uint64_t)x19 & 0x7ffffffffffff);
-{ uint64_t x22 = (x12 + (0x5 * x20));
-{ uint64_t x23 = (x22 >> 0x34);
-{ uint64_t x24 = (x22 & 0xfffffffffffff);
-{ uint64_t x25 = (x23 + x15);
-{ uint64_t x26 = (x25 >> 0x33);
-{ uint64_t x27 = (x25 & 0x7ffffffffffff);
-out[0] = x21;
-out[1] = x26 + x18;
-out[2] = x27;
-out[3] = x24;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesquare(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+ { uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x5 * (0x2 * ((uint128_t)x5 * x5))));
+ { uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x5 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+ { uint128_t x10 = (((uint128_t)x2 * x2) + (0x5 * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
+ { uint64_t x11 = (uint64_t) (x10 >> 0x34);
+ { uint64_t x12 = ((uint64_t)x10 & 0xfffffffffffff);
+ { uint128_t x13 = (x11 + x9);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x33);
+ { uint64_t x15 = ((uint64_t)x13 & 0x7ffffffffffff);
+ { uint128_t x16 = (x14 + x8);
+ { uint64_t x17 = (uint64_t) (x16 >> 0x34);
+ { uint64_t x18 = ((uint64_t)x16 & 0xfffffffffffff);
+ { uint128_t x19 = (x17 + x7);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x33);
+ { uint64_t x21 = ((uint64_t)x19 & 0x7ffffffffffff);
+ { uint64_t x22 = (x12 + (0x5 * x20));
+ { uint64_t x23 = (x22 >> 0x34);
+ { uint64_t x24 = (x22 & 0xfffffffffffff);
+ { uint64_t x25 = (x23 + x15);
+ { uint64_t x26 = (x25 >> 0x33);
+ { uint64_t x27 = (x25 & 0x7ffffffffffff);
+ out[0] = x24;
+ out[1] = x27;
+ out[2] = (x26 + x18);
+ out[3] = x21;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e206m5/freeze.c b/src/Specific/solinas64_2e206m5/freeze.c
index 3b2652885..b60e0ad31 100644
--- a/src/Specific/solinas64_2e206m5/freeze.c
+++ b/src/Specific/solinas64_2e206m5/freeze.c
@@ -1,25 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x8;
-out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 52 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffffffffffb;;
+static void freeze(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8, uint8_t x9 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xffffffffffffb);
+ { uint64_t x11; uint8_t x12 = _subborrow_u51(x9, x4, 0x7ffffffffffff, &x11);
+ { uint64_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x12, Return x6, 0xfffffffffffff);
+ { uint64_t x17; uint8_t x18 = _subborrow_u51(x15, x5, 0x7ffffffffffff, &x17);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xffffffffffffb);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x8, Return x20);
+ { uint64_t x24 = (x19 & 0x7ffffffffffff);
+ { uint64_t x26; uint8_t x27 = _addcarryx_u51(x23, x11, x24, &x26);
+ { uint64_t x28 = (x19 & 0xfffffffffffff);
+ { uint64_t x30, uint8_t x31 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x14, Return x28);
+ { uint64_t x32 = (x19 & 0x7ffffffffffff);
+ { uint64_t x34; uint8_t _ = _addcarryx_u51(x31, x17, x32, &x34);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e212m29/femul.c b/src/Specific/solinas64_2e212m29/femul.c
index 24bb4cc1f..edd1e152c 100644
--- a/src/Specific/solinas64_2e212m29/femul.c
+++ b/src/Specific/solinas64_2e212m29/femul.c
@@ -1,46 +1,36 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
-{ uint128_t x17 = ((((uint128_t)x5 * x15) + (((uint128_t)x7 * x13) + ((uint128_t)x9 * x11))) + (0x1d * ((uint128_t)x8 * x14)));
-{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x1d * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
-{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x1d * (((uint128_t)x7 * x14) + (((uint128_t)x9 * x15) + ((uint128_t)x8 * x13)))));
-{ uint64_t x20 = (uint64_t) (x19 >> 0x35);
-{ uint64_t x21 = ((uint64_t)x19 & 0x1fffffffffffff);
-{ uint128_t x22 = (x20 + x18);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x35);
-{ uint64_t x24 = ((uint64_t)x22 & 0x1fffffffffffff);
-{ uint128_t x25 = (x23 + x17);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x35);
-{ uint64_t x27 = ((uint64_t)x25 & 0x1fffffffffffff);
-{ uint128_t x28 = (x26 + x16);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x35);
-{ uint64_t x30 = ((uint64_t)x28 & 0x1fffffffffffff);
-{ uint64_t x31 = (x21 + (0x1d * x29));
-{ uint64_t x32 = (x31 >> 0x35);
-{ uint64_t x33 = (x31 & 0x1fffffffffffff);
-{ uint64_t x34 = (x32 + x24);
-{ uint64_t x35 = (x34 >> 0x35);
-{ uint64_t x36 = (x34 & 0x1fffffffffffff);
-out[0] = x30;
-out[1] = x35 + x27;
-out[2] = x36;
-out[3] = x33;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+ { uint128_t x17 = ((((uint128_t)x5 * x15) + (((uint128_t)x7 * x13) + ((uint128_t)x9 * x11))) + (0x1d * ((uint128_t)x8 * x14)));
+ { uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x1d * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+ { uint128_t x19 = (((uint128_t)x5 * x11) + (0x1d * (((uint128_t)x7 * x14) + (((uint128_t)x9 * x15) + ((uint128_t)x8 * x13)))));
+ { uint64_t x20 = (uint64_t) (x19 >> 0x35);
+ { uint64_t x21 = ((uint64_t)x19 & 0x1fffffffffffff);
+ { uint128_t x22 = (x20 + x18);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x35);
+ { uint64_t x24 = ((uint64_t)x22 & 0x1fffffffffffff);
+ { uint128_t x25 = (x23 + x17);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x35);
+ { uint64_t x27 = ((uint64_t)x25 & 0x1fffffffffffff);
+ { uint128_t x28 = (x26 + x16);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x35);
+ { uint64_t x30 = ((uint64_t)x28 & 0x1fffffffffffff);
+ { uint64_t x31 = (x21 + (0x1d * x29));
+ { uint64_t x32 = (x31 >> 0x35);
+ { uint64_t x33 = (x31 & 0x1fffffffffffff);
+ { uint64_t x34 = (x32 + x24);
+ { uint64_t x35 = (x34 >> 0x35);
+ { uint64_t x36 = (x34 & 0x1fffffffffffff);
+ out[0] = x33;
+ out[1] = x36;
+ out[2] = (x35 + x27);
+ out[3] = x30;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e212m29/fesquare.c b/src/Specific/solinas64_2e212m29/fesquare.c
index 7c4f36e46..faa9c384f 100644
--- a/src/Specific/solinas64_2e212m29/fesquare.c
+++ b/src/Specific/solinas64_2e212m29/fesquare.c
@@ -1,46 +1,32 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
-{ uint128_t x8 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x1d * ((uint128_t)x5 * x5)));
-{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x1d * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
-{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x1d * (((uint128_t)x4 * x5) + (((uint128_t)x6 * x6) + ((uint128_t)x5 * x4)))));
-{ uint64_t x11 = (uint64_t) (x10 >> 0x35);
-{ uint64_t x12 = ((uint64_t)x10 & 0x1fffffffffffff);
-{ uint128_t x13 = (x11 + x9);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x35);
-{ uint64_t x15 = ((uint64_t)x13 & 0x1fffffffffffff);
-{ uint128_t x16 = (x14 + x8);
-{ uint64_t x17 = (uint64_t) (x16 >> 0x35);
-{ uint64_t x18 = ((uint64_t)x16 & 0x1fffffffffffff);
-{ uint128_t x19 = (x17 + x7);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x35);
-{ uint64_t x21 = ((uint64_t)x19 & 0x1fffffffffffff);
-{ uint64_t x22 = (x12 + (0x1d * x20));
-{ uint64_t x23 = (x22 >> 0x35);
-{ uint64_t x24 = (x22 & 0x1fffffffffffff);
-{ uint64_t x25 = (x23 + x15);
-{ uint64_t x26 = (x25 >> 0x35);
-{ uint64_t x27 = (x25 & 0x1fffffffffffff);
-out[0] = x21;
-out[1] = x26 + x18;
-out[2] = x27;
-out[3] = x24;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesquare(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+ { uint128_t x8 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x1d * ((uint128_t)x5 * x5)));
+ { uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x1d * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+ { uint128_t x10 = (((uint128_t)x2 * x2) + (0x1d * (((uint128_t)x4 * x5) + (((uint128_t)x6 * x6) + ((uint128_t)x5 * x4)))));
+ { uint64_t x11 = (uint64_t) (x10 >> 0x35);
+ { uint64_t x12 = ((uint64_t)x10 & 0x1fffffffffffff);
+ { uint128_t x13 = (x11 + x9);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x35);
+ { uint64_t x15 = ((uint64_t)x13 & 0x1fffffffffffff);
+ { uint128_t x16 = (x14 + x8);
+ { uint64_t x17 = (uint64_t) (x16 >> 0x35);
+ { uint64_t x18 = ((uint64_t)x16 & 0x1fffffffffffff);
+ { uint128_t x19 = (x17 + x7);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x35);
+ { uint64_t x21 = ((uint64_t)x19 & 0x1fffffffffffff);
+ { uint64_t x22 = (x12 + (0x1d * x20));
+ { uint64_t x23 = (x22 >> 0x35);
+ { uint64_t x24 = (x22 & 0x1fffffffffffff);
+ { uint64_t x25 = (x23 + x15);
+ { uint64_t x26 = (x25 >> 0x35);
+ { uint64_t x27 = (x25 & 0x1fffffffffffff);
+ out[0] = x24;
+ out[1] = x27;
+ out[2] = (x26 + x18);
+ out[3] = x21;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e212m29/freeze.c b/src/Specific/solinas64_2e212m29/freeze.c
index c5b0cf10e..66574dbe1 100644
--- a/src/Specific/solinas64_2e212m29/freeze.c
+++ b/src/Specific/solinas64_2e212m29/freeze.c
@@ -1,25 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x8;
-out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 53 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x1fffffffffffe3;;
+static void freeze(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8, uint8_t x9 = Op (Syntax.SubWithGetBorrow 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x1fffffffffffe3);
+ { uint64_t x11, uint8_t x12 = Op (Syntax.SubWithGetBorrow 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x9, Return x4, 0x1fffffffffffff);
+ { uint64_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x12, Return x6, 0x1fffffffffffff);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x15, Return x5, 0x1fffffffffffff);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0x1fffffffffffe3);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.AddWithGetCarry 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x8, Return x20);
+ { uint64_t x24 = (x19 & 0x1fffffffffffff);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.AddWithGetCarry 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x11, Return x24);
+ { uint64_t x28 = (x19 & 0x1fffffffffffff);
+ { uint64_t x30, uint8_t x31 = Op (Syntax.AddWithGetCarry 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x14, Return x28);
+ { uint64_t x32 = (x19 & 0x1fffffffffffff);
+ { uint64_t x34, uint8_t _ = Op (Syntax.AddWithGetCarry 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x31, Return x17, Return x32);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e213m3/femul.c b/src/Specific/solinas64_2e213m3/femul.c
index 5ce881fe6..9b8555ac6 100644
--- a/src/Specific/solinas64_2e213m3/femul.c
+++ b/src/Specific/solinas64_2e213m3/femul.c
@@ -1,46 +1,36 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint128_t x16 = (((uint128_t)x5 * x14) + ((0x2 * ((uint128_t)x7 * x15)) + ((0x2 * ((uint128_t)x9 * x13)) + ((uint128_t)x8 * x11))));
-{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x3 * ((uint128_t)x8 * x14)));
-{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x3 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
-{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x3 * ((0x2 * ((uint128_t)x7 * x14)) + ((0x2 * ((uint128_t)x9 * x15)) + (0x2 * ((uint128_t)x8 * x13))))));
-{ uint64_t x20 = (uint64_t) (x19 >> 0x36);
-{ uint64_t x21 = ((uint64_t)x19 & 0x3fffffffffffff);
-{ uint128_t x22 = (x20 + x18);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x35);
-{ uint64_t x24 = ((uint64_t)x22 & 0x1fffffffffffff);
-{ uint128_t x25 = (x23 + x17);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x35);
-{ uint64_t x27 = ((uint64_t)x25 & 0x1fffffffffffff);
-{ uint128_t x28 = (x26 + x16);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x35);
-{ uint64_t x30 = ((uint64_t)x28 & 0x1fffffffffffff);
-{ uint64_t x31 = (x21 + (0x3 * x29));
-{ uint64_t x32 = (x31 >> 0x36);
-{ uint64_t x33 = (x31 & 0x3fffffffffffff);
-{ uint64_t x34 = (x32 + x24);
-{ uint64_t x35 = (x34 >> 0x35);
-{ uint64_t x36 = (x34 & 0x1fffffffffffff);
-out[0] = x30;
-out[1] = x35 + x27;
-out[2] = x36;
-out[3] = x33;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint128_t x16 = (((uint128_t)x5 * x14) + ((0x2 * ((uint128_t)x7 * x15)) + ((0x2 * ((uint128_t)x9 * x13)) + ((uint128_t)x8 * x11))));
+ { uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x3 * ((uint128_t)x8 * x14)));
+ { uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x3 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+ { uint128_t x19 = (((uint128_t)x5 * x11) + (0x3 * ((0x2 * ((uint128_t)x7 * x14)) + ((0x2 * ((uint128_t)x9 * x15)) + (0x2 * ((uint128_t)x8 * x13))))));
+ { uint64_t x20 = (uint64_t) (x19 >> 0x36);
+ { uint64_t x21 = ((uint64_t)x19 & 0x3fffffffffffff);
+ { uint128_t x22 = (x20 + x18);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x35);
+ { uint64_t x24 = ((uint64_t)x22 & 0x1fffffffffffff);
+ { uint128_t x25 = (x23 + x17);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x35);
+ { uint64_t x27 = ((uint64_t)x25 & 0x1fffffffffffff);
+ { uint128_t x28 = (x26 + x16);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x35);
+ { uint64_t x30 = ((uint64_t)x28 & 0x1fffffffffffff);
+ { uint64_t x31 = (x21 + (0x3 * x29));
+ { uint64_t x32 = (x31 >> 0x36);
+ { uint64_t x33 = (x31 & 0x3fffffffffffff);
+ { uint64_t x34 = (x32 + x24);
+ { uint64_t x35 = (x34 >> 0x35);
+ { uint64_t x36 = (x34 & 0x1fffffffffffff);
+ out[0] = x33;
+ out[1] = x36;
+ out[2] = (x35 + x27);
+ out[3] = x30;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e213m3/fesquare.c b/src/Specific/solinas64_2e213m3/fesquare.c
index 1ba8671f7..f7dd12ecc 100644
--- a/src/Specific/solinas64_2e213m3/fesquare.c
+++ b/src/Specific/solinas64_2e213m3/fesquare.c
@@ -1,46 +1,32 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x7 = (((uint128_t)x2 * x5) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x5 * x2))));
-{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x3 * ((uint128_t)x5 * x5)));
-{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x3 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
-{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x3 * ((0x2 * ((uint128_t)x4 * x5)) + ((0x2 * ((uint128_t)x6 * x6)) + (0x2 * ((uint128_t)x5 * x4))))));
-{ uint64_t x11 = (uint64_t) (x10 >> 0x36);
-{ uint64_t x12 = ((uint64_t)x10 & 0x3fffffffffffff);
-{ uint128_t x13 = (x11 + x9);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x35);
-{ uint64_t x15 = ((uint64_t)x13 & 0x1fffffffffffff);
-{ uint128_t x16 = (x14 + x8);
-{ uint64_t x17 = (uint64_t) (x16 >> 0x35);
-{ uint64_t x18 = ((uint64_t)x16 & 0x1fffffffffffff);
-{ uint128_t x19 = (x17 + x7);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x35);
-{ uint64_t x21 = ((uint64_t)x19 & 0x1fffffffffffff);
-{ uint64_t x22 = (x12 + (0x3 * x20));
-{ uint64_t x23 = (x22 >> 0x36);
-{ uint64_t x24 = (x22 & 0x3fffffffffffff);
-{ uint64_t x25 = (x23 + x15);
-{ uint64_t x26 = (x25 >> 0x35);
-{ uint64_t x27 = (x25 & 0x1fffffffffffff);
-out[0] = x21;
-out[1] = x26 + x18;
-out[2] = x27;
-out[3] = x24;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesquare(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x7 = (((uint128_t)x2 * x5) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x5 * x2))));
+ { uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x3 * ((uint128_t)x5 * x5)));
+ { uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x3 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+ { uint128_t x10 = (((uint128_t)x2 * x2) + (0x3 * ((0x2 * ((uint128_t)x4 * x5)) + ((0x2 * ((uint128_t)x6 * x6)) + (0x2 * ((uint128_t)x5 * x4))))));
+ { uint64_t x11 = (uint64_t) (x10 >> 0x36);
+ { uint64_t x12 = ((uint64_t)x10 & 0x3fffffffffffff);
+ { uint128_t x13 = (x11 + x9);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x35);
+ { uint64_t x15 = ((uint64_t)x13 & 0x1fffffffffffff);
+ { uint128_t x16 = (x14 + x8);
+ { uint64_t x17 = (uint64_t) (x16 >> 0x35);
+ { uint64_t x18 = ((uint64_t)x16 & 0x1fffffffffffff);
+ { uint128_t x19 = (x17 + x7);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x35);
+ { uint64_t x21 = ((uint64_t)x19 & 0x1fffffffffffff);
+ { uint64_t x22 = (x12 + (0x3 * x20));
+ { uint64_t x23 = (x22 >> 0x36);
+ { uint64_t x24 = (x22 & 0x3fffffffffffff);
+ { uint64_t x25 = (x23 + x15);
+ { uint64_t x26 = (x25 >> 0x35);
+ { uint64_t x27 = (x25 & 0x1fffffffffffff);
+ out[0] = x24;
+ out[1] = x27;
+ out[2] = (x26 + x18);
+ out[3] = x21;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e213m3/freeze.c b/src/Specific/solinas64_2e213m3/freeze.c
index 65f4d878d..0ce2f392a 100644
--- a/src/Specific/solinas64_2e213m3/freeze.c
+++ b/src/Specific/solinas64_2e213m3/freeze.c
@@ -1,25 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x8;
-out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 54 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3ffffffffffffd;;
+static void freeze(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8, uint8_t x9 = Op (Syntax.SubWithGetBorrow 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x3ffffffffffffd);
+ { uint64_t x11, uint8_t x12 = Op (Syntax.SubWithGetBorrow 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x9, Return x4, 0x1fffffffffffff);
+ { uint64_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x12, Return x6, 0x1fffffffffffff);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x15, Return x5, 0x1fffffffffffff);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0x3ffffffffffffd);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.AddWithGetCarry 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x8, Return x20);
+ { uint64_t x24 = (x19 & 0x1fffffffffffff);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.AddWithGetCarry 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x11, Return x24);
+ { uint64_t x28 = (x19 & 0x1fffffffffffff);
+ { uint64_t x30, uint8_t x31 = Op (Syntax.AddWithGetCarry 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x14, Return x28);
+ { uint64_t x32 = (x19 & 0x1fffffffffffff);
+ { uint64_t x34, uint8_t _ = Op (Syntax.AddWithGetCarry 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x31, Return x17, Return x32);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e216m2e108m1/femul.c b/src/Specific/solinas64_2e216m2e108m1/femul.c
index 59d5d9612..94f4a962f 100644
--- a/src/Specific/solinas64_2e216m2e108m1/femul.c
+++ b/src/Specific/solinas64_2e216m2e108m1/femul.c
@@ -1,59 +1,49 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint128_t x16 = (((uint128_t)(x7 + x8) * (x13 + x14)) - ((uint128_t)x7 * x13));
-{ uint128_t x17 = ((((uint128_t)(x5 + x9) * (x13 + x14)) + ((uint128_t)(x7 + x8) * (x11 + x15))) - (((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)));
-{ uint128_t x18 = (((uint128_t)(x5 + x9) * (x11 + x15)) - ((uint128_t)x5 * x11));
-{ uint128_t x19 = (((((uint128_t)x7 * x13) + ((uint128_t)x8 * x14)) + x18) + x16);
-{ uint128_t x20 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15)));
-{ uint128_t x21 = ((((uint128_t)x5 * x11) + ((uint128_t)x9 * x15)) + x16);
-{ uint64_t x22 = (uint64_t) (x20 >> 0x36);
-{ uint64_t x23 = ((uint64_t)x20 & 0x3fffffffffffff);
-{ uint64_t x24 = (uint64_t) (x17 >> 0x36);
-{ uint64_t x25 = ((uint64_t)x17 & 0x3fffffffffffff);
-{ uint128_t x26 = (((uint128_t)0x40000000000000 * x24) + x25);
-{ uint64_t x27 = (uint64_t) (x26 >> 0x36);
-{ uint64_t x28 = ((uint64_t)x26 & 0x3fffffffffffff);
-{ uint128_t x29 = ((x22 + x19) + x27);
-{ uint64_t x30 = (uint64_t) (x29 >> 0x36);
-{ uint64_t x31 = ((uint64_t)x29 & 0x3fffffffffffff);
-{ uint128_t x32 = (x21 + x27);
-{ uint64_t x33 = (uint64_t) (x32 >> 0x36);
-{ uint64_t x34 = ((uint64_t)x32 & 0x3fffffffffffff);
-{ uint64_t x35 = (x30 + x28);
-{ uint64_t x36 = (x35 >> 0x36);
-{ uint64_t x37 = (x35 & 0x3fffffffffffff);
-{ uint64_t x38 = (x33 + x23);
-{ uint64_t x39 = (x38 >> 0x36);
-{ uint64_t x40 = (x38 & 0x3fffffffffffff);
-{ uint64_t x41 = ((0x40000000000000 * x36) + x37);
-{ uint64_t x42 = (x41 >> 0x36);
-{ uint64_t x43 = (x41 & 0x3fffffffffffff);
-{ uint64_t x44 = ((x39 + x31) + x42);
-{ uint64_t x45 = (x44 >> 0x36);
-{ uint64_t x46 = (x44 & 0x3fffffffffffff);
-{ uint64_t x47 = (x34 + x42);
-{ uint64_t x48 = (x47 >> 0x36);
-{ uint64_t x49 = (x47 & 0x3fffffffffffff);
-out[0] = x45 + x43;
-out[1] = x46;
-out[2] = x48 + x40;
-out[3] = x49;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint128_t x16 = (((uint128_t)(x7 + x8) * (x13 + x14)) - ((uint128_t)x7 * x13));
+ { uint128_t x17 = ((((uint128_t)(x5 + x9) * (x13 + x14)) + ((uint128_t)(x7 + x8) * (x11 + x15))) - (((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)));
+ { uint128_t x18 = (((uint128_t)(x5 + x9) * (x11 + x15)) - ((uint128_t)x5 * x11));
+ { uint128_t x19 = (((((uint128_t)x7 * x13) + ((uint128_t)x8 * x14)) + x18) + x16);
+ { uint128_t x20 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15)));
+ { uint128_t x21 = ((((uint128_t)x5 * x11) + ((uint128_t)x9 * x15)) + x16);
+ { uint64_t x22 = (uint64_t) (x20 >> 0x36);
+ { uint64_t x23 = ((uint64_t)x20 & 0x3fffffffffffff);
+ { uint64_t x24 = (uint64_t) (x17 >> 0x36);
+ { uint64_t x25 = ((uint64_t)x17 & 0x3fffffffffffff);
+ { uint128_t x26 = (((uint128_t)0x40000000000000 * x24) + x25);
+ { uint64_t x27 = (uint64_t) (x26 >> 0x36);
+ { uint64_t x28 = ((uint64_t)x26 & 0x3fffffffffffff);
+ { uint128_t x29 = ((x22 + x19) + x27);
+ { uint64_t x30 = (uint64_t) (x29 >> 0x36);
+ { uint64_t x31 = ((uint64_t)x29 & 0x3fffffffffffff);
+ { uint128_t x32 = (x21 + x27);
+ { uint64_t x33 = (uint64_t) (x32 >> 0x36);
+ { uint64_t x34 = ((uint64_t)x32 & 0x3fffffffffffff);
+ { uint64_t x35 = (x30 + x28);
+ { uint64_t x36 = (x35 >> 0x36);
+ { uint64_t x37 = (x35 & 0x3fffffffffffff);
+ { uint64_t x38 = (x33 + x23);
+ { uint64_t x39 = (x38 >> 0x36);
+ { uint64_t x40 = (x38 & 0x3fffffffffffff);
+ { uint64_t x41 = ((0x40000000000000 * x36) + x37);
+ { uint64_t x42 = (x41 >> 0x36);
+ { uint64_t x43 = (x41 & 0x3fffffffffffff);
+ { uint64_t x44 = ((x39 + x31) + x42);
+ { uint64_t x45 = (x44 >> 0x36);
+ { uint64_t x46 = (x44 & 0x3fffffffffffff);
+ { uint64_t x47 = (x34 + x42);
+ { uint64_t x48 = (x47 >> 0x36);
+ { uint64_t x49 = (x47 & 0x3fffffffffffff);
+ out[0] = x49;
+ out[1] = (x48 + x40);
+ out[2] = x46;
+ out[3] = (x45 + x43);
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e216m2e108m1/fesquare.c b/src/Specific/solinas64_2e216m2e108m1/fesquare.c
index 90792a951..de35400a3 100644
--- a/src/Specific/solinas64_2e216m2e108m1/fesquare.c
+++ b/src/Specific/solinas64_2e216m2e108m1/fesquare.c
@@ -1,59 +1,45 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x7 = (((uint128_t)(x4 + x5) * (x4 + x5)) - ((uint128_t)x4 * x4));
-{ uint128_t x8 = ((((uint128_t)(x2 + x6) * (x4 + x5)) + ((uint128_t)(x4 + x5) * (x2 + x6))) - (((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)));
-{ uint128_t x9 = (((uint128_t)(x2 + x6) * (x2 + x6)) - ((uint128_t)x2 * x2));
-{ uint128_t x10 = (((((uint128_t)x4 * x4) + ((uint128_t)x5 * x5)) + x9) + x7);
-{ uint128_t x11 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6)));
-{ uint128_t x12 = ((((uint128_t)x2 * x2) + ((uint128_t)x6 * x6)) + x7);
-{ uint64_t x13 = (uint64_t) (x11 >> 0x36);
-{ uint64_t x14 = ((uint64_t)x11 & 0x3fffffffffffff);
-{ uint64_t x15 = (uint64_t) (x8 >> 0x36);
-{ uint64_t x16 = ((uint64_t)x8 & 0x3fffffffffffff);
-{ uint128_t x17 = (((uint128_t)0x40000000000000 * x15) + x16);
-{ uint64_t x18 = (uint64_t) (x17 >> 0x36);
-{ uint64_t x19 = ((uint64_t)x17 & 0x3fffffffffffff);
-{ uint128_t x20 = ((x13 + x10) + x18);
-{ uint64_t x21 = (uint64_t) (x20 >> 0x36);
-{ uint64_t x22 = ((uint64_t)x20 & 0x3fffffffffffff);
-{ uint128_t x23 = (x12 + x18);
-{ uint64_t x24 = (uint64_t) (x23 >> 0x36);
-{ uint64_t x25 = ((uint64_t)x23 & 0x3fffffffffffff);
-{ uint64_t x26 = (x21 + x19);
-{ uint64_t x27 = (x26 >> 0x36);
-{ uint64_t x28 = (x26 & 0x3fffffffffffff);
-{ uint64_t x29 = (x24 + x14);
-{ uint64_t x30 = (x29 >> 0x36);
-{ uint64_t x31 = (x29 & 0x3fffffffffffff);
-{ uint64_t x32 = ((0x40000000000000 * x27) + x28);
-{ uint64_t x33 = (x32 >> 0x36);
-{ uint64_t x34 = (x32 & 0x3fffffffffffff);
-{ uint64_t x35 = ((x30 + x22) + x33);
-{ uint64_t x36 = (x35 >> 0x36);
-{ uint64_t x37 = (x35 & 0x3fffffffffffff);
-{ uint64_t x38 = (x25 + x33);
-{ uint64_t x39 = (x38 >> 0x36);
-{ uint64_t x40 = (x38 & 0x3fffffffffffff);
-out[0] = x36 + x34;
-out[1] = x37;
-out[2] = x39 + x31;
-out[3] = x40;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesquare(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x7 = (((uint128_t)(x4 + x5) * (x4 + x5)) - ((uint128_t)x4 * x4));
+ { uint128_t x8 = ((((uint128_t)(x2 + x6) * (x4 + x5)) + ((uint128_t)(x4 + x5) * (x2 + x6))) - (((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)));
+ { uint128_t x9 = (((uint128_t)(x2 + x6) * (x2 + x6)) - ((uint128_t)x2 * x2));
+ { uint128_t x10 = (((((uint128_t)x4 * x4) + ((uint128_t)x5 * x5)) + x9) + x7);
+ { uint128_t x11 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6)));
+ { uint128_t x12 = ((((uint128_t)x2 * x2) + ((uint128_t)x6 * x6)) + x7);
+ { uint64_t x13 = (uint64_t) (x11 >> 0x36);
+ { uint64_t x14 = ((uint64_t)x11 & 0x3fffffffffffff);
+ { uint64_t x15 = (uint64_t) (x8 >> 0x36);
+ { uint64_t x16 = ((uint64_t)x8 & 0x3fffffffffffff);
+ { uint128_t x17 = (((uint128_t)0x40000000000000 * x15) + x16);
+ { uint64_t x18 = (uint64_t) (x17 >> 0x36);
+ { uint64_t x19 = ((uint64_t)x17 & 0x3fffffffffffff);
+ { uint128_t x20 = ((x13 + x10) + x18);
+ { uint64_t x21 = (uint64_t) (x20 >> 0x36);
+ { uint64_t x22 = ((uint64_t)x20 & 0x3fffffffffffff);
+ { uint128_t x23 = (x12 + x18);
+ { uint64_t x24 = (uint64_t) (x23 >> 0x36);
+ { uint64_t x25 = ((uint64_t)x23 & 0x3fffffffffffff);
+ { uint64_t x26 = (x21 + x19);
+ { uint64_t x27 = (x26 >> 0x36);
+ { uint64_t x28 = (x26 & 0x3fffffffffffff);
+ { uint64_t x29 = (x24 + x14);
+ { uint64_t x30 = (x29 >> 0x36);
+ { uint64_t x31 = (x29 & 0x3fffffffffffff);
+ { uint64_t x32 = ((0x40000000000000 * x27) + x28);
+ { uint64_t x33 = (x32 >> 0x36);
+ { uint64_t x34 = (x32 & 0x3fffffffffffff);
+ { uint64_t x35 = ((x30 + x22) + x33);
+ { uint64_t x36 = (x35 >> 0x36);
+ { uint64_t x37 = (x35 & 0x3fffffffffffff);
+ { uint64_t x38 = (x25 + x33);
+ { uint64_t x39 = (x38 >> 0x36);
+ { uint64_t x40 = (x38 & 0x3fffffffffffff);
+ out[0] = x40;
+ out[1] = (x39 + x31);
+ out[2] = x37;
+ out[3] = (x36 + x34);
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e216m2e108m1/freeze.c b/src/Specific/solinas64_2e216m2e108m1/freeze.c
index 61713cf58..cf49d9fff 100644
--- a/src/Specific/solinas64_2e216m2e108m1/freeze.c
+++ b/src/Specific/solinas64_2e216m2e108m1/freeze.c
@@ -1,25 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x8;
-out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 54 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3fffffffffffff;;
+static void freeze(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8, uint8_t x9 = Op (Syntax.SubWithGetBorrow 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x3fffffffffffff);
+ { uint64_t x11, uint8_t x12 = Op (Syntax.SubWithGetBorrow 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x9, Return x4, 0x3fffffffffffff);
+ { uint64_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x12, Return x6, 0x3ffffffffffffe);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x15, Return x5, 0x3fffffffffffff);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0x3fffffffffffff);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.AddWithGetCarry 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x8, Return x20);
+ { uint64_t x24 = (x19 & 0x3fffffffffffff);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.AddWithGetCarry 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x11, Return x24);
+ { uint64_t x28 = (x19 & 0x3ffffffffffffe);
+ { uint64_t x30, uint8_t x31 = Op (Syntax.AddWithGetCarry 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x14, Return x28);
+ { uint64_t x32 = (x19 & 0x3fffffffffffff);
+ { uint64_t x34, uint8_t _ = Op (Syntax.AddWithGetCarry 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x31, Return x17, Return x32);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e221m3/femul.c b/src/Specific/solinas64_2e221m3/femul.c
index d792750e6..57b54ec37 100644
--- a/src/Specific/solinas64_2e221m3/femul.c
+++ b/src/Specific/solinas64_2e221m3/femul.c
@@ -1,46 +1,36 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint128_t x16 = (((uint128_t)x5 * x14) + ((0x2 * ((uint128_t)x7 * x15)) + ((0x2 * ((uint128_t)x9 * x13)) + ((uint128_t)x8 * x11))));
-{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x3 * ((uint128_t)x8 * x14)));
-{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x3 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
-{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x3 * ((0x2 * ((uint128_t)x7 * x14)) + ((0x2 * ((uint128_t)x9 * x15)) + (0x2 * ((uint128_t)x8 * x13))))));
-{ uint64_t x20 = (uint64_t) (x19 >> 0x38);
-{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffffffff);
-{ uint128_t x22 = (x20 + x18);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x37);
-{ uint64_t x24 = ((uint64_t)x22 & 0x7fffffffffffff);
-{ uint128_t x25 = (x23 + x17);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x37);
-{ uint64_t x27 = ((uint64_t)x25 & 0x7fffffffffffff);
-{ uint128_t x28 = (x26 + x16);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x37);
-{ uint64_t x30 = ((uint64_t)x28 & 0x7fffffffffffff);
-{ uint64_t x31 = (x21 + (0x3 * x29));
-{ uint64_t x32 = (x31 >> 0x38);
-{ uint64_t x33 = (x31 & 0xffffffffffffff);
-{ uint64_t x34 = (x32 + x24);
-{ uint64_t x35 = (x34 >> 0x37);
-{ uint64_t x36 = (x34 & 0x7fffffffffffff);
-out[0] = x30;
-out[1] = x35 + x27;
-out[2] = x36;
-out[3] = x33;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint128_t x16 = (((uint128_t)x5 * x14) + ((0x2 * ((uint128_t)x7 * x15)) + ((0x2 * ((uint128_t)x9 * x13)) + ((uint128_t)x8 * x11))));
+ { uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x3 * ((uint128_t)x8 * x14)));
+ { uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x3 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+ { uint128_t x19 = (((uint128_t)x5 * x11) + (0x3 * ((0x2 * ((uint128_t)x7 * x14)) + ((0x2 * ((uint128_t)x9 * x15)) + (0x2 * ((uint128_t)x8 * x13))))));
+ { uint64_t x20 = (uint64_t) (x19 >> 0x38);
+ { uint64_t x21 = ((uint64_t)x19 & 0xffffffffffffff);
+ { uint128_t x22 = (x20 + x18);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x37);
+ { uint64_t x24 = ((uint64_t)x22 & 0x7fffffffffffff);
+ { uint128_t x25 = (x23 + x17);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x37);
+ { uint64_t x27 = ((uint64_t)x25 & 0x7fffffffffffff);
+ { uint128_t x28 = (x26 + x16);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x37);
+ { uint64_t x30 = ((uint64_t)x28 & 0x7fffffffffffff);
+ { uint64_t x31 = (x21 + (0x3 * x29));
+ { uint64_t x32 = (x31 >> 0x38);
+ { uint64_t x33 = (x31 & 0xffffffffffffff);
+ { uint64_t x34 = (x32 + x24);
+ { uint64_t x35 = (x34 >> 0x37);
+ { uint64_t x36 = (x34 & 0x7fffffffffffff);
+ out[0] = x33;
+ out[1] = x36;
+ out[2] = (x35 + x27);
+ out[3] = x30;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e221m3/fesquare.c b/src/Specific/solinas64_2e221m3/fesquare.c
index 6448c6975..5ea9ea22a 100644
--- a/src/Specific/solinas64_2e221m3/fesquare.c
+++ b/src/Specific/solinas64_2e221m3/fesquare.c
@@ -1,46 +1,32 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x7 = (((uint128_t)x2 * x5) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x5 * x2))));
-{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x3 * ((uint128_t)x5 * x5)));
-{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x3 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
-{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x3 * ((0x2 * ((uint128_t)x4 * x5)) + ((0x2 * ((uint128_t)x6 * x6)) + (0x2 * ((uint128_t)x5 * x4))))));
-{ uint64_t x11 = (uint64_t) (x10 >> 0x38);
-{ uint64_t x12 = ((uint64_t)x10 & 0xffffffffffffff);
-{ uint128_t x13 = (x11 + x9);
-{ uint64_t x14 = (uint64_t) (x13 >> 0x37);
-{ uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffffff);
-{ uint128_t x16 = (x14 + x8);
-{ uint64_t x17 = (uint64_t) (x16 >> 0x37);
-{ uint64_t x18 = ((uint64_t)x16 & 0x7fffffffffffff);
-{ uint128_t x19 = (x17 + x7);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x37);
-{ uint64_t x21 = ((uint64_t)x19 & 0x7fffffffffffff);
-{ uint64_t x22 = (x12 + (0x3 * x20));
-{ uint64_t x23 = (x22 >> 0x38);
-{ uint64_t x24 = (x22 & 0xffffffffffffff);
-{ uint64_t x25 = (x23 + x15);
-{ uint64_t x26 = (x25 >> 0x37);
-{ uint64_t x27 = (x25 & 0x7fffffffffffff);
-out[0] = x21;
-out[1] = x26 + x18;
-out[2] = x27;
-out[3] = x24;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesquare(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x7 = (((uint128_t)x2 * x5) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x5 * x2))));
+ { uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x3 * ((uint128_t)x5 * x5)));
+ { uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x3 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+ { uint128_t x10 = (((uint128_t)x2 * x2) + (0x3 * ((0x2 * ((uint128_t)x4 * x5)) + ((0x2 * ((uint128_t)x6 * x6)) + (0x2 * ((uint128_t)x5 * x4))))));
+ { uint64_t x11 = (uint64_t) (x10 >> 0x38);
+ { uint64_t x12 = ((uint64_t)x10 & 0xffffffffffffff);
+ { uint128_t x13 = (x11 + x9);
+ { uint64_t x14 = (uint64_t) (x13 >> 0x37);
+ { uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffffff);
+ { uint128_t x16 = (x14 + x8);
+ { uint64_t x17 = (uint64_t) (x16 >> 0x37);
+ { uint64_t x18 = ((uint64_t)x16 & 0x7fffffffffffff);
+ { uint128_t x19 = (x17 + x7);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x37);
+ { uint64_t x21 = ((uint64_t)x19 & 0x7fffffffffffff);
+ { uint64_t x22 = (x12 + (0x3 * x20));
+ { uint64_t x23 = (x22 >> 0x38);
+ { uint64_t x24 = (x22 & 0xffffffffffffff);
+ { uint64_t x25 = (x23 + x15);
+ { uint64_t x26 = (x25 >> 0x37);
+ { uint64_t x27 = (x25 & 0x7fffffffffffff);
+ out[0] = x24;
+ out[1] = x27;
+ out[2] = (x26 + x18);
+ out[3] = x21;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e221m3/freeze.c b/src/Specific/solinas64_2e221m3/freeze.c
index eee23e3d9..62fee1b86 100644
--- a/src/Specific/solinas64_2e221m3/freeze.c
+++ b/src/Specific/solinas64_2e221m3/freeze.c
@@ -1,25 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x8;
-out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 56 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xfffffffffffffd;;
+static void freeze(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8, uint8_t x9 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xfffffffffffffd);
+ { uint64_t x11, uint8_t x12 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x9, Return x4, 0x7fffffffffffff);
+ { uint64_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x12, Return x6, 0x7fffffffffffff);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x15, Return x5, 0x7fffffffffffff);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xfffffffffffffd);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x8, Return x20);
+ { uint64_t x24 = (x19 & 0x7fffffffffffff);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x11, Return x24);
+ { uint64_t x28 = (x19 & 0x7fffffffffffff);
+ { uint64_t x30, uint8_t x31 = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x14, Return x28);
+ { uint64_t x32 = (x19 & 0x7fffffffffffff);
+ { uint64_t x34, uint8_t _ = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x31, Return x17, Return x32);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e222m117/femul.c b/src/Specific/solinas64_2e222m117/femul.c
index de30ee59e..db1b7c285 100644
--- a/src/Specific/solinas64_2e222m117/femul.c
+++ b/src/Specific/solinas64_2e222m117/femul.c
@@ -1,46 +1,36 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
-{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x75 * (0x2 * ((uint128_t)x8 * x14))));
-{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x75 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
-{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x75 * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
-{ uint128_t x20 = (x19 >> 0x38);
-{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffffffff);
-{ uint128_t x22 = (x20 + x18);
-{ uint128_t x23 = (x22 >> 0x37);
-{ uint64_t x24 = ((uint64_t)x22 & 0x7fffffffffffff);
-{ uint128_t x25 = (x23 + x17);
-{ uint128_t x26 = (x25 >> 0x38);
-{ uint64_t x27 = ((uint64_t)x25 & 0xffffffffffffff);
-{ uint128_t x28 = (x26 + x16);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x37);
-{ uint64_t x30 = ((uint64_t)x28 & 0x7fffffffffffff);
-{ uint128_t x31 = (x21 + ((uint128_t)0x75 * x29));
-{ uint64_t x32 = (uint64_t) (x31 >> 0x38);
-{ uint64_t x33 = ((uint64_t)x31 & 0xffffffffffffff);
-{ uint64_t x34 = (x32 + x24);
-{ uint64_t x35 = (x34 >> 0x37);
-{ uint64_t x36 = (x34 & 0x7fffffffffffff);
-out[0] = x30;
-out[1] = x35 + x27;
-out[2] = x36;
-out[3] = x33;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+ { uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x75 * (0x2 * ((uint128_t)x8 * x14))));
+ { uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x75 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+ { uint128_t x19 = (((uint128_t)x5 * x11) + (0x75 * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
+ { uint128_t x20 = (x19 >> 0x38);
+ { uint64_t x21 = ((uint64_t)x19 & 0xffffffffffffff);
+ { uint128_t x22 = (x20 + x18);
+ { uint128_t x23 = (x22 >> 0x37);
+ { uint64_t x24 = ((uint64_t)x22 & 0x7fffffffffffff);
+ { uint128_t x25 = (x23 + x17);
+ { uint128_t x26 = (x25 >> 0x38);
+ { uint64_t x27 = ((uint64_t)x25 & 0xffffffffffffff);
+ { uint128_t x28 = (x26 + x16);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x37);
+ { uint64_t x30 = ((uint64_t)x28 & 0x7fffffffffffff);
+ { uint128_t x31 = (x21 + ((uint128_t)0x75 * x29));
+ { uint64_t x32 = (uint64_t) (x31 >> 0x38);
+ { uint64_t x33 = ((uint64_t)x31 & 0xffffffffffffff);
+ { uint64_t x34 = (x32 + x24);
+ { uint64_t x35 = (x34 >> 0x37);
+ { uint64_t x36 = (x34 & 0x7fffffffffffff);
+ out[0] = x33;
+ out[1] = x36;
+ out[2] = (x35 + x27);
+ out[3] = x30;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e222m117/fesquare.c b/src/Specific/solinas64_2e222m117/fesquare.c
index 36b9552e9..ffaeac9af 100644
--- a/src/Specific/solinas64_2e222m117/fesquare.c
+++ b/src/Specific/solinas64_2e222m117/fesquare.c
@@ -1,46 +1,32 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
-{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x75 * (0x2 * ((uint128_t)x5 * x5))));
-{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x75 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
-{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x75 * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
-{ uint128_t x11 = (x10 >> 0x38);
-{ uint64_t x12 = ((uint64_t)x10 & 0xffffffffffffff);
-{ uint128_t x13 = (x11 + x9);
-{ uint128_t x14 = (x13 >> 0x37);
-{ uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffffff);
-{ uint128_t x16 = (x14 + x8);
-{ uint128_t x17 = (x16 >> 0x38);
-{ uint64_t x18 = ((uint64_t)x16 & 0xffffffffffffff);
-{ uint128_t x19 = (x17 + x7);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x37);
-{ uint64_t x21 = ((uint64_t)x19 & 0x7fffffffffffff);
-{ uint128_t x22 = (x12 + ((uint128_t)0x75 * x20));
-{ uint64_t x23 = (uint64_t) (x22 >> 0x38);
-{ uint64_t x24 = ((uint64_t)x22 & 0xffffffffffffff);
-{ uint64_t x25 = (x23 + x15);
-{ uint64_t x26 = (x25 >> 0x37);
-{ uint64_t x27 = (x25 & 0x7fffffffffffff);
-out[0] = x21;
-out[1] = x26 + x18;
-out[2] = x27;
-out[3] = x24;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesquare(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+ { uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x75 * (0x2 * ((uint128_t)x5 * x5))));
+ { uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x75 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+ { uint128_t x10 = (((uint128_t)x2 * x2) + (0x75 * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
+ { uint128_t x11 = (x10 >> 0x38);
+ { uint64_t x12 = ((uint64_t)x10 & 0xffffffffffffff);
+ { uint128_t x13 = (x11 + x9);
+ { uint128_t x14 = (x13 >> 0x37);
+ { uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffffff);
+ { uint128_t x16 = (x14 + x8);
+ { uint128_t x17 = (x16 >> 0x38);
+ { uint64_t x18 = ((uint64_t)x16 & 0xffffffffffffff);
+ { uint128_t x19 = (x17 + x7);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x37);
+ { uint64_t x21 = ((uint64_t)x19 & 0x7fffffffffffff);
+ { uint128_t x22 = (x12 + ((uint128_t)0x75 * x20));
+ { uint64_t x23 = (uint64_t) (x22 >> 0x38);
+ { uint64_t x24 = ((uint64_t)x22 & 0xffffffffffffff);
+ { uint64_t x25 = (x23 + x15);
+ { uint64_t x26 = (x25 >> 0x37);
+ { uint64_t x27 = (x25 & 0x7fffffffffffff);
+ out[0] = x24;
+ out[1] = x27;
+ out[2] = (x26 + x18);
+ out[3] = x21;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e222m117/freeze.c b/src/Specific/solinas64_2e222m117/freeze.c
index 100ce761c..c2f7bf6da 100644
--- a/src/Specific/solinas64_2e222m117/freeze.c
+++ b/src/Specific/solinas64_2e222m117/freeze.c
@@ -1,25 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x8;
-out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 56 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffffffffff8b;;
+static void freeze(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8, uint8_t x9 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xffffffffffff8b);
+ { uint64_t x11, uint8_t x12 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x9, Return x4, 0x7fffffffffffff);
+ { uint64_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x12, Return x6, 0xffffffffffffff);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x15, Return x5, 0x7fffffffffffff);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0xffffffffffff8b);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x8, Return x20);
+ { uint64_t x24 = (x19 & 0x7fffffffffffff);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x11, Return x24);
+ { uint64_t x28 = (x19 & 0xffffffffffffff);
+ { uint64_t x30, uint8_t x31 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x14, Return x28);
+ { uint64_t x32 = (x19 & 0x7fffffffffffff);
+ { uint64_t x34, uint8_t _ = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x31, Return x17, Return x32);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e224m2e96p1/freeze.c b/src/Specific/solinas64_2e224m2e96p1/freeze.c
index 576552ae8..eef7258b8 100644
--- a/src/Specific/solinas64_2e224m2e96p1/freeze.c
+++ b/src/Specific/solinas64_2e224m2e96p1/freeze.c
@@ -1,25 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x8;
-out[1] = ℤ x9 = Op Syntax.SubWithGetBorrow 56 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 3 Syntax.TWord 6 Syntax.TZ 0x0;
-out[2] = x2;
-out[3] = 0x1;;
+static void freeze(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8, ℤ x9 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) Syntax.TZ) (0x0, Return x2, 0x1);
+ { uint64_t x11, uint8_t x12 = Op (Syntax.SubWithGetBorrow 56 Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x9, Return x4, 0xffff0000000000);
+ { uint64_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x12, Return x6, 0xffffffffffffff);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x15, Return x5, 0xffffffffffffff);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint8_t x20 = ((uint8_t)x19 & 0x1);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x8, Return x20);
+ { uint64_t x24 = (x19 & 0xffff0000000000);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x11, Return x24);
+ { uint64_t x28 = (x19 & 0xffffffffffffff);
+ { uint64_t x30, uint8_t x31 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x14, Return x28);
+ { uint64_t x32 = (x19 & 0xffffffffffffff);
+ { uint64_t x34, uint8_t _ = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x31, Return x17, Return x32);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e226m5/femul.c b/src/Specific/solinas64_2e226m5/femul.c
index 5caa29c35..d92664251 100644
--- a/src/Specific/solinas64_2e226m5/femul.c
+++ b/src/Specific/solinas64_2e226m5/femul.c
@@ -1,46 +1,36 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
-{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x5 * (0x2 * ((uint128_t)x8 * x14))));
-{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x5 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
-{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x5 * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
-{ uint64_t x20 = (uint64_t) (x19 >> 0x39);
-{ uint64_t x21 = ((uint64_t)x19 & 0x1ffffffffffffff);
-{ uint128_t x22 = (x20 + x18);
-{ uint128_t x23 = (x22 >> 0x38);
-{ uint64_t x24 = ((uint64_t)x22 & 0xffffffffffffff);
-{ uint128_t x25 = (x23 + x17);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x39);
-{ uint64_t x27 = ((uint64_t)x25 & 0x1ffffffffffffff);
-{ uint128_t x28 = (x26 + x16);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x38);
-{ uint64_t x30 = ((uint64_t)x28 & 0xffffffffffffff);
-{ uint128_t x31 = (x21 + ((uint128_t)0x5 * x29));
-{ uint64_t x32 = (uint64_t) (x31 >> 0x39);
-{ uint64_t x33 = ((uint64_t)x31 & 0x1ffffffffffffff);
-{ uint64_t x34 = (x32 + x24);
-{ uint64_t x35 = (x34 >> 0x38);
-{ uint64_t x36 = (x34 & 0xffffffffffffff);
-out[0] = x30;
-out[1] = x35 + x27;
-out[2] = x36;
-out[3] = x33;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+ { uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x5 * (0x2 * ((uint128_t)x8 * x14))));
+ { uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x5 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+ { uint128_t x19 = (((uint128_t)x5 * x11) + (0x5 * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
+ { uint64_t x20 = (uint64_t) (x19 >> 0x39);
+ { uint64_t x21 = ((uint64_t)x19 & 0x1ffffffffffffff);
+ { uint128_t x22 = (x20 + x18);
+ { uint128_t x23 = (x22 >> 0x38);
+ { uint64_t x24 = ((uint64_t)x22 & 0xffffffffffffff);
+ { uint128_t x25 = (x23 + x17);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x39);
+ { uint64_t x27 = ((uint64_t)x25 & 0x1ffffffffffffff);
+ { uint128_t x28 = (x26 + x16);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x38);
+ { uint64_t x30 = ((uint64_t)x28 & 0xffffffffffffff);
+ { uint128_t x31 = (x21 + ((uint128_t)0x5 * x29));
+ { uint64_t x32 = (uint64_t) (x31 >> 0x39);
+ { uint64_t x33 = ((uint64_t)x31 & 0x1ffffffffffffff);
+ { uint64_t x34 = (x32 + x24);
+ { uint64_t x35 = (x34 >> 0x38);
+ { uint64_t x36 = (x34 & 0xffffffffffffff);
+ out[0] = x33;
+ out[1] = x36;
+ out[2] = (x35 + x27);
+ out[3] = x30;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e226m5/fesquare.c b/src/Specific/solinas64_2e226m5/fesquare.c
index eb4aa2ef1..bb9017957 100644
--- a/src/Specific/solinas64_2e226m5/fesquare.c
+++ b/src/Specific/solinas64_2e226m5/fesquare.c
@@ -1,46 +1,32 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
-{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x5 * (0x2 * ((uint128_t)x5 * x5))));
-{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x5 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
-{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x5 * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
-{ uint64_t x11 = (uint64_t) (x10 >> 0x39);
-{ uint64_t x12 = ((uint64_t)x10 & 0x1ffffffffffffff);
-{ uint128_t x13 = (x11 + x9);
-{ uint128_t x14 = (x13 >> 0x38);
-{ uint64_t x15 = ((uint64_t)x13 & 0xffffffffffffff);
-{ uint128_t x16 = (x14 + x8);
-{ uint64_t x17 = (uint64_t) (x16 >> 0x39);
-{ uint64_t x18 = ((uint64_t)x16 & 0x1ffffffffffffff);
-{ uint128_t x19 = (x17 + x7);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x38);
-{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffffffff);
-{ uint128_t x22 = (x12 + ((uint128_t)0x5 * x20));
-{ uint64_t x23 = (uint64_t) (x22 >> 0x39);
-{ uint64_t x24 = ((uint64_t)x22 & 0x1ffffffffffffff);
-{ uint64_t x25 = (x23 + x15);
-{ uint64_t x26 = (x25 >> 0x38);
-{ uint64_t x27 = (x25 & 0xffffffffffffff);
-out[0] = x21;
-out[1] = x26 + x18;
-out[2] = x27;
-out[3] = x24;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesquare(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+ { uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x5 * (0x2 * ((uint128_t)x5 * x5))));
+ { uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x5 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+ { uint128_t x10 = (((uint128_t)x2 * x2) + (0x5 * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
+ { uint64_t x11 = (uint64_t) (x10 >> 0x39);
+ { uint64_t x12 = ((uint64_t)x10 & 0x1ffffffffffffff);
+ { uint128_t x13 = (x11 + x9);
+ { uint128_t x14 = (x13 >> 0x38);
+ { uint64_t x15 = ((uint64_t)x13 & 0xffffffffffffff);
+ { uint128_t x16 = (x14 + x8);
+ { uint64_t x17 = (uint64_t) (x16 >> 0x39);
+ { uint64_t x18 = ((uint64_t)x16 & 0x1ffffffffffffff);
+ { uint128_t x19 = (x17 + x7);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x38);
+ { uint64_t x21 = ((uint64_t)x19 & 0xffffffffffffff);
+ { uint128_t x22 = (x12 + ((uint128_t)0x5 * x20));
+ { uint64_t x23 = (uint64_t) (x22 >> 0x39);
+ { uint64_t x24 = ((uint64_t)x22 & 0x1ffffffffffffff);
+ { uint64_t x25 = (x23 + x15);
+ { uint64_t x26 = (x25 >> 0x38);
+ { uint64_t x27 = (x25 & 0xffffffffffffff);
+ out[0] = x24;
+ out[1] = x27;
+ out[2] = (x26 + x18);
+ out[3] = x21;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e226m5/freeze.c b/src/Specific/solinas64_2e226m5/freeze.c
index 86cb034c4..2f855e910 100644
--- a/src/Specific/solinas64_2e226m5/freeze.c
+++ b/src/Specific/solinas64_2e226m5/freeze.c
@@ -1,25 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x8;
-out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 57 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x1fffffffffffffb;;
+static void freeze(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8, uint8_t x9 = Op (Syntax.SubWithGetBorrow 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x1fffffffffffffb);
+ { uint64_t x11, uint8_t x12 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x9, Return x4, 0xffffffffffffff);
+ { uint64_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x12, Return x6, 0x1ffffffffffffff);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x15, Return x5, 0xffffffffffffff);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0x1fffffffffffffb);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.AddWithGetCarry 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x8, Return x20);
+ { uint64_t x24 = (x19 & 0xffffffffffffff);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x11, Return x24);
+ { uint64_t x28 = (x19 & 0x1ffffffffffffff);
+ { uint64_t x30, uint8_t x31 = Op (Syntax.AddWithGetCarry 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x14, Return x28);
+ { uint64_t x32 = (x19 & 0xffffffffffffff);
+ { uint64_t x34, uint8_t _ = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x31, Return x17, Return x32);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e230m27/femul.c b/src/Specific/solinas64_2e230m27/femul.c
index c031d85b1..ffb6b77cd 100644
--- a/src/Specific/solinas64_2e230m27/femul.c
+++ b/src/Specific/solinas64_2e230m27/femul.c
@@ -1,46 +1,36 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
-{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
-{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x1b * (0x2 * ((uint128_t)x8 * x14))));
-{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x1b * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
-{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x1b * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
-{ uint128_t x20 = (x19 >> 0x3a);
-{ uint64_t x21 = ((uint64_t)x19 & 0x3ffffffffffffff);
-{ uint128_t x22 = (x20 + x18);
-{ uint128_t x23 = (x22 >> 0x39);
-{ uint64_t x24 = ((uint64_t)x22 & 0x1ffffffffffffff);
-{ uint128_t x25 = (x23 + x17);
-{ uint128_t x26 = (x25 >> 0x3a);
-{ uint64_t x27 = ((uint64_t)x25 & 0x3ffffffffffffff);
-{ uint128_t x28 = (x26 + x16);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x39);
-{ uint64_t x30 = ((uint64_t)x28 & 0x1ffffffffffffff);
-{ uint128_t x31 = (x21 + ((uint128_t)0x1b * x29));
-{ uint64_t x32 = (uint64_t) (x31 >> 0x3a);
-{ uint64_t x33 = ((uint64_t)x31 & 0x3ffffffffffffff);
-{ uint64_t x34 = (x32 + x24);
-{ uint64_t x35 = (x34 >> 0x39);
-{ uint64_t x36 = (x34 & 0x1ffffffffffffff);
-out[0] = x30;
-out[1] = x35 + x27;
-out[2] = x36;
-out[3] = x33;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void femul(uint64_t out[4], const uint64_t in1[4], const uint64_t in2[4]) {
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x14 = in2[3];
+ { const uint64_t x15 = in2[2];
+ { const uint64_t x13 = in2[1];
+ { const uint64_t x11 = in2[0];
+ { uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+ { uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x1b * (0x2 * ((uint128_t)x8 * x14))));
+ { uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x1b * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+ { uint128_t x19 = (((uint128_t)x5 * x11) + (0x1b * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
+ { uint128_t x20 = (x19 >> 0x3a);
+ { uint64_t x21 = ((uint64_t)x19 & 0x3ffffffffffffff);
+ { uint128_t x22 = (x20 + x18);
+ { uint128_t x23 = (x22 >> 0x39);
+ { uint64_t x24 = ((uint64_t)x22 & 0x1ffffffffffffff);
+ { uint128_t x25 = (x23 + x17);
+ { uint128_t x26 = (x25 >> 0x3a);
+ { uint64_t x27 = ((uint64_t)x25 & 0x3ffffffffffffff);
+ { uint128_t x28 = (x26 + x16);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x39);
+ { uint64_t x30 = ((uint64_t)x28 & 0x1ffffffffffffff);
+ { uint128_t x31 = (x21 + ((uint128_t)0x1b * x29));
+ { uint64_t x32 = (uint64_t) (x31 >> 0x3a);
+ { uint64_t x33 = ((uint64_t)x31 & 0x3ffffffffffffff);
+ { uint64_t x34 = (x32 + x24);
+ { uint64_t x35 = (x34 >> 0x39);
+ { uint64_t x36 = (x34 & 0x1ffffffffffffff);
+ out[0] = x33;
+ out[1] = x36;
+ out[2] = (x35 + x27);
+ out[3] = x30;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e230m27/fesquare.c b/src/Specific/solinas64_2e230m27/fesquare.c
index 8e75c2873..daf47328b 100644
--- a/src/Specific/solinas64_2e230m27/fesquare.c
+++ b/src/Specific/solinas64_2e230m27/fesquare.c
@@ -1,46 +1,32 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
-{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x1b * (0x2 * ((uint128_t)x5 * x5))));
-{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x1b * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
-{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x1b * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
-{ uint128_t x11 = (x10 >> 0x3a);
-{ uint64_t x12 = ((uint64_t)x10 & 0x3ffffffffffffff);
-{ uint128_t x13 = (x11 + x9);
-{ uint128_t x14 = (x13 >> 0x39);
-{ uint64_t x15 = ((uint64_t)x13 & 0x1ffffffffffffff);
-{ uint128_t x16 = (x14 + x8);
-{ uint128_t x17 = (x16 >> 0x3a);
-{ uint64_t x18 = ((uint64_t)x16 & 0x3ffffffffffffff);
-{ uint128_t x19 = (x17 + x7);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x39);
-{ uint64_t x21 = ((uint64_t)x19 & 0x1ffffffffffffff);
-{ uint128_t x22 = (x12 + ((uint128_t)0x1b * x20));
-{ uint64_t x23 = (uint64_t) (x22 >> 0x3a);
-{ uint64_t x24 = ((uint64_t)x22 & 0x3ffffffffffffff);
-{ uint64_t x25 = (x23 + x15);
-{ uint64_t x26 = (x25 >> 0x39);
-{ uint64_t x27 = (x25 & 0x1ffffffffffffff);
-out[0] = x21;
-out[1] = x26 + x18;
-out[2] = x27;
-out[3] = x24;
-}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[4];
+static void fesquare(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+ { uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x1b * (0x2 * ((uint128_t)x5 * x5))));
+ { uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x1b * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+ { uint128_t x10 = (((uint128_t)x2 * x2) + (0x1b * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
+ { uint128_t x11 = (x10 >> 0x3a);
+ { uint64_t x12 = ((uint64_t)x10 & 0x3ffffffffffffff);
+ { uint128_t x13 = (x11 + x9);
+ { uint128_t x14 = (x13 >> 0x39);
+ { uint64_t x15 = ((uint64_t)x13 & 0x1ffffffffffffff);
+ { uint128_t x16 = (x14 + x8);
+ { uint128_t x17 = (x16 >> 0x3a);
+ { uint64_t x18 = ((uint64_t)x16 & 0x3ffffffffffffff);
+ { uint128_t x19 = (x17 + x7);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x39);
+ { uint64_t x21 = ((uint64_t)x19 & 0x1ffffffffffffff);
+ { uint128_t x22 = (x12 + ((uint128_t)0x1b * x20));
+ { uint64_t x23 = (uint64_t) (x22 >> 0x3a);
+ { uint64_t x24 = ((uint64_t)x22 & 0x3ffffffffffffff);
+ { uint64_t x25 = (x23 + x15);
+ { uint64_t x26 = (x25 >> 0x39);
+ { uint64_t x27 = (x25 & 0x1ffffffffffffff);
+ out[0] = x24;
+ out[1] = x27;
+ out[2] = (x26 + x18);
+ out[3] = x21;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e230m27/freeze.c b/src/Specific/solinas64_2e230m27/freeze.c
index a85af082f..ba5b9476e 100644
--- a/src/Specific/solinas64_2e230m27/freeze.c
+++ b/src/Specific/solinas64_2e230m27/freeze.c
@@ -1,25 +1,24 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x8;
-out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 58 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3ffffffffffffe5;;
+static void freeze(uint64_t out[4], const uint64_t in1[4]) {
+ { const uint64_t x5 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x8, uint8_t x9 = Op (Syntax.SubWithGetBorrow 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x3ffffffffffffe5);
+ { uint64_t x11, uint8_t x12 = Op (Syntax.SubWithGetBorrow 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x9, Return x4, 0x1ffffffffffffff);
+ { uint64_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x12, Return x6, 0x3ffffffffffffff);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x15, Return x5, 0x1ffffffffffffff);
+ { uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+ { uint64_t x20 = (x19 & 0x3ffffffffffffe5);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.AddWithGetCarry 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x8, Return x20);
+ { uint64_t x24 = (x19 & 0x1ffffffffffffff);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.AddWithGetCarry 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x11, Return x24);
+ { uint64_t x28 = (x19 & 0x3ffffffffffffff);
+ { uint64_t x30, uint8_t x31 = Op (Syntax.AddWithGetCarry 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x14, Return x28);
+ { uint64_t x32 = (x19 & 0x1ffffffffffffff);
+ { uint64_t x34, uint8_t _ = Op (Syntax.AddWithGetCarry 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x31, Return x17, Return x32);
+ out[0] = x22;
+ out[1] = x26;
+ out[2] = x30;
+ out[3] = x34;
+ }}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e235m15/femul.c b/src/Specific/solinas64_2e235m15/femul.c
index 84bb1392e..0dcf869f7 100644
--- a/src/Specific/solinas64_2e235m15/femul.c
+++ b/src/Specific/solinas64_2e235m15/femul.c
@@ -1,51 +1,43 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint128_t x20 = (((uint128_t)x5 * x18) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + (((uint128_t)x11 * x15) + ((uint128_t)x10 * x13)))));
-{ uint128_t x21 = ((((uint128_t)x5 * x19) + (((uint128_t)x7 * x17) + (((uint128_t)x9 * x15) + ((uint128_t)x11 * x13)))) + (0xf * ((uint128_t)x10 * x18)));
-{ uint128_t x22 = ((((uint128_t)x5 * x17) + (((uint128_t)x7 * x15) + ((uint128_t)x9 * x13))) + (0xf * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
-{ uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0xf * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
-{ uint128_t x24 = (((uint128_t)x5 * x13) + (0xf * (((uint128_t)x7 * x18) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x10 * x15))))));
-{ uint64_t x25 = (uint64_t) (x24 >> 0x2f);
-{ uint64_t x26 = ((uint64_t)x24 & 0x7fffffffffff);
-{ uint128_t x27 = (x25 + x23);
-{ uint64_t x28 = (uint64_t) (x27 >> 0x2f);
-{ uint64_t x29 = ((uint64_t)x27 & 0x7fffffffffff);
-{ uint128_t x30 = (x28 + x22);
-{ uint64_t x31 = (uint64_t) (x30 >> 0x2f);
-{ uint64_t x32 = ((uint64_t)x30 & 0x7fffffffffff);
-{ uint128_t x33 = (x31 + x21);
-{ uint64_t x34 = (uint64_t) (x33 >> 0x2f);
-{ uint64_t x35 = ((uint64_t)x33 & 0x7fffffffffff);
-{ uint128_t x36 = (x34 + x20);
-{ uint64_t x37 = (uint64_t) (x36 >> 0x2f);
-{ uint64_t x38 = ((uint64_t)x36 & 0x7fffffffffff);
-{ uint64_t x39 = (x26 + (0xf * x37));
-{ uint64_t x40 = (x39 >> 0x2f);
-{ uint64_t x41 = (x39 & 0x7fffffffffff);
-{ uint64_t x42 = (x40 + x29);
-{ uint64_t x43 = (x42 >> 0x2f);
-{ uint64_t x44 = (x42 & 0x7fffffffffff);
-out[0] = x38;
-out[1] = x35;
-out[2] = x43 + x32;
-out[3] = x44;
-out[4] = x41;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x18 = in2[4];
+ { const uint64_t x19 = in2[3];
+ { const uint64_t x17 = in2[2];
+ { const uint64_t x15 = in2[1];
+ { const uint64_t x13 = in2[0];
+ { uint128_t x20 = (((uint128_t)x5 * x18) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + (((uint128_t)x11 * x15) + ((uint128_t)x10 * x13)))));
+ { uint128_t x21 = ((((uint128_t)x5 * x19) + (((uint128_t)x7 * x17) + (((uint128_t)x9 * x15) + ((uint128_t)x11 * x13)))) + (0xf * ((uint128_t)x10 * x18)));
+ { uint128_t x22 = ((((uint128_t)x5 * x17) + (((uint128_t)x7 * x15) + ((uint128_t)x9 * x13))) + (0xf * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
+ { uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0xf * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
+ { uint128_t x24 = (((uint128_t)x5 * x13) + (0xf * (((uint128_t)x7 * x18) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x10 * x15))))));
+ { uint64_t x25 = (uint64_t) (x24 >> 0x2f);
+ { uint64_t x26 = ((uint64_t)x24 & 0x7fffffffffff);
+ { uint128_t x27 = (x25 + x23);
+ { uint64_t x28 = (uint64_t) (x27 >> 0x2f);
+ { uint64_t x29 = ((uint64_t)x27 & 0x7fffffffffff);
+ { uint128_t x30 = (x28 + x22);
+ { uint64_t x31 = (uint64_t) (x30 >> 0x2f);
+ { uint64_t x32 = ((uint64_t)x30 & 0x7fffffffffff);
+ { uint128_t x33 = (x31 + x21);
+ { uint64_t x34 = (uint64_t) (x33 >> 0x2f);
+ { uint64_t x35 = ((uint64_t)x33 & 0x7fffffffffff);
+ { uint128_t x36 = (x34 + x20);
+ { uint64_t x37 = (uint64_t) (x36 >> 0x2f);
+ { uint64_t x38 = ((uint64_t)x36 & 0x7fffffffffff);
+ { uint64_t x39 = (x26 + (0xf * x37));
+ { uint64_t x40 = (x39 >> 0x2f);
+ { uint64_t x41 = (x39 & 0x7fffffffffff);
+ { uint64_t x42 = (x40 + x29);
+ { uint64_t x43 = (x42 >> 0x2f);
+ { uint64_t x44 = (x42 & 0x7fffffffffff);
+ out[0] = x41;
+ out[1] = x44;
+ out[2] = (x43 + x32);
+ out[3] = x35;
+ out[4] = x38;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e235m15/fesquare.c b/src/Specific/solinas64_2e235m15/fesquare.c
index edd311a16..20e3056c3 100644
--- a/src/Specific/solinas64_2e235m15/fesquare.c
+++ b/src/Specific/solinas64_2e235m15/fesquare.c
@@ -1,51 +1,38 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x9 = (((uint128_t)x2 * x7) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x7 * x2)))));
-{ uint128_t x10 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0xf * ((uint128_t)x7 * x7)));
-{ uint128_t x11 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0xf * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
-{ uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xf * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
-{ uint128_t x13 = (((uint128_t)x2 * x2) + (0xf * (((uint128_t)x4 * x7) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((uint128_t)x7 * x4))))));
-{ uint64_t x14 = (uint64_t) (x13 >> 0x2f);
-{ uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffff);
-{ uint128_t x16 = (x14 + x12);
-{ uint64_t x17 = (uint64_t) (x16 >> 0x2f);
-{ uint64_t x18 = ((uint64_t)x16 & 0x7fffffffffff);
-{ uint128_t x19 = (x17 + x11);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x2f);
-{ uint64_t x21 = ((uint64_t)x19 & 0x7fffffffffff);
-{ uint128_t x22 = (x20 + x10);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x2f);
-{ uint64_t x24 = ((uint64_t)x22 & 0x7fffffffffff);
-{ uint128_t x25 = (x23 + x9);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x2f);
-{ uint64_t x27 = ((uint64_t)x25 & 0x7fffffffffff);
-{ uint64_t x28 = (x15 + (0xf * x26));
-{ uint64_t x29 = (x28 >> 0x2f);
-{ uint64_t x30 = (x28 & 0x7fffffffffff);
-{ uint64_t x31 = (x29 + x18);
-{ uint64_t x32 = (x31 >> 0x2f);
-{ uint64_t x33 = (x31 & 0x7fffffffffff);
-out[0] = x27;
-out[1] = x24;
-out[2] = x32 + x21;
-out[3] = x33;
-out[4] = x30;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesquare(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x9 = (((uint128_t)x2 * x7) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x7 * x2)))));
+ { uint128_t x10 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0xf * ((uint128_t)x7 * x7)));
+ { uint128_t x11 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0xf * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
+ { uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xf * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
+ { uint128_t x13 = (((uint128_t)x2 * x2) + (0xf * (((uint128_t)x4 * x7) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((uint128_t)x7 * x4))))));
+ { uint64_t x14 = (uint64_t) (x13 >> 0x2f);
+ { uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffff);
+ { uint128_t x16 = (x14 + x12);
+ { uint64_t x17 = (uint64_t) (x16 >> 0x2f);
+ { uint64_t x18 = ((uint64_t)x16 & 0x7fffffffffff);
+ { uint128_t x19 = (x17 + x11);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x2f);
+ { uint64_t x21 = ((uint64_t)x19 & 0x7fffffffffff);
+ { uint128_t x22 = (x20 + x10);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x2f);
+ { uint64_t x24 = ((uint64_t)x22 & 0x7fffffffffff);
+ { uint128_t x25 = (x23 + x9);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x2f);
+ { uint64_t x27 = ((uint64_t)x25 & 0x7fffffffffff);
+ { uint64_t x28 = (x15 + (0xf * x26));
+ { uint64_t x29 = (x28 >> 0x2f);
+ { uint64_t x30 = (x28 & 0x7fffffffffff);
+ { uint64_t x31 = (x29 + x18);
+ { uint64_t x32 = (x31 >> 0x2f);
+ { uint64_t x33 = (x31 & 0x7fffffffffff);
+ out[0] = x30;
+ out[1] = x33;
+ out[2] = (x32 + x21);
+ out[3] = x24;
+ out[4] = x27;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e235m15/freeze.c b/src/Specific/solinas64_2e235m15/freeze.c
index 6b21e6e37..867313133 100644
--- a/src/Specific/solinas64_2e235m15/freeze.c
+++ b/src/Specific/solinas64_2e235m15/freeze.c
@@ -1,25 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x10;
-out[1] = uint8_t x11 = Op Syntax.SubWithGetBorrow 47 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7ffffffffff1;;
+static void freeze(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x10, uint8_t x11 = Op (Syntax.SubWithGetBorrow 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x7ffffffffff1);
+ { uint64_t x13, uint8_t x14 = Op (Syntax.SubWithGetBorrow 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x11, Return x4, 0x7fffffffffff);
+ { uint64_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x14, Return x6, 0x7fffffffffff);
+ { uint64_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x17, Return x8, 0x7fffffffffff);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x20, Return x7, 0x7fffffffffff);
+ { uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
+ { uint64_t x25 = (x24 & 0x7ffffffffff1);
+ { uint64_t x27, uint8_t x28 = Op (Syntax.AddWithGetCarry 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x10, Return x25);
+ { uint64_t x29 = (x24 & 0x7fffffffffff);
+ { uint64_t x31, uint8_t x32 = Op (Syntax.AddWithGetCarry 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x28, Return x13, Return x29);
+ { uint64_t x33 = (x24 & 0x7fffffffffff);
+ { uint64_t x35, uint8_t x36 = Op (Syntax.AddWithGetCarry 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x32, Return x16, Return x33);
+ { uint64_t x37 = (x24 & 0x7fffffffffff);
+ { uint64_t x39, uint8_t x40 = Op (Syntax.AddWithGetCarry 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x36, Return x19, Return x37);
+ { uint64_t x41 = (x24 & 0x7fffffffffff);
+ { uint64_t x43, uint8_t _ = Op (Syntax.AddWithGetCarry 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x40, Return x22, Return x41);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e243m9/femul.c b/src/Specific/solinas64_2e243m9/femul.c
index 2a5ad5f77..1f3dd625a 100644
--- a/src/Specific/solinas64_2e243m9/femul.c
+++ b/src/Specific/solinas64_2e243m9/femul.c
@@ -1,56 +1,50 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint128_t x24 = (((uint128_t)x5 * x22) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + (((uint128_t)x11 * x19) + (((uint128_t)x13 * x17) + ((uint128_t)x12 * x15))))));
-{ uint128_t x25 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + (((uint128_t)x9 * x19) + ((0x2 * ((uint128_t)x11 * x17)) + ((uint128_t)x13 * x15))))) + (0x9 * (0x2 * ((uint128_t)x12 * x22))));
-{ uint128_t x26 = ((((uint128_t)x5 * x21) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + ((uint128_t)x11 * x15)))) + (0x9 * (((uint128_t)x13 * x22) + ((uint128_t)x12 * x23))));
-{ uint128_t x27 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((uint128_t)x9 * x15))) + (0x9 * ((0x2 * ((uint128_t)x11 * x22)) + (((uint128_t)x13 * x23) + (0x2 * ((uint128_t)x12 * x21))))));
-{ uint128_t x28 = ((((uint128_t)x5 * x17) + ((uint128_t)x7 * x15)) + (0x9 * (((uint128_t)x9 * x22) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x12 * x19))))));
-{ uint128_t x29 = (((uint128_t)x5 * x15) + (0x9 * ((0x2 * ((uint128_t)x7 * x22)) + (((uint128_t)x9 * x23) + ((0x2 * ((uint128_t)x11 * x21)) + (((uint128_t)x13 * x19) + (0x2 * ((uint128_t)x12 * x17))))))));
-{ uint64_t x30 = (uint64_t) (x29 >> 0x29);
-{ uint64_t x31 = ((uint64_t)x29 & 0x1ffffffffff);
-{ uint128_t x32 = (x30 + x28);
-{ uint64_t x33 = (uint64_t) (x32 >> 0x28);
-{ uint64_t x34 = ((uint64_t)x32 & 0xffffffffff);
-{ uint128_t x35 = (x33 + x27);
-{ uint64_t x36 = (uint64_t) (x35 >> 0x29);
-{ uint64_t x37 = ((uint64_t)x35 & 0x1ffffffffff);
-{ uint128_t x38 = (x36 + x26);
-{ uint64_t x39 = (uint64_t) (x38 >> 0x28);
-{ uint64_t x40 = ((uint64_t)x38 & 0xffffffffff);
-{ uint128_t x41 = (x39 + x25);
-{ uint64_t x42 = (uint64_t) (x41 >> 0x29);
-{ uint64_t x43 = ((uint64_t)x41 & 0x1ffffffffff);
-{ uint128_t x44 = (x42 + x24);
-{ uint64_t x45 = (uint64_t) (x44 >> 0x28);
-{ uint64_t x46 = ((uint64_t)x44 & 0xffffffffff);
-{ uint64_t x47 = (x31 + (0x9 * x45));
-{ uint64_t x48 = (x47 >> 0x29);
-{ uint64_t x49 = (x47 & 0x1ffffffffff);
-{ uint64_t x50 = (x48 + x34);
-{ uint64_t x51 = (x50 >> 0x28);
-{ uint64_t x52 = (x50 & 0xffffffffff);
-out[0] = x46;
-out[1] = x43;
-out[2] = x40;
-out[3] = x51 + x37;
-out[4] = x52;
-out[5] = x49;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint128_t x24 = (((uint128_t)x5 * x22) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + (((uint128_t)x11 * x19) + (((uint128_t)x13 * x17) + ((uint128_t)x12 * x15))))));
+ { uint128_t x25 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + (((uint128_t)x9 * x19) + ((0x2 * ((uint128_t)x11 * x17)) + ((uint128_t)x13 * x15))))) + (0x9 * (0x2 * ((uint128_t)x12 * x22))));
+ { uint128_t x26 = ((((uint128_t)x5 * x21) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + ((uint128_t)x11 * x15)))) + (0x9 * (((uint128_t)x13 * x22) + ((uint128_t)x12 * x23))));
+ { uint128_t x27 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((uint128_t)x9 * x15))) + (0x9 * ((0x2 * ((uint128_t)x11 * x22)) + (((uint128_t)x13 * x23) + (0x2 * ((uint128_t)x12 * x21))))));
+ { uint128_t x28 = ((((uint128_t)x5 * x17) + ((uint128_t)x7 * x15)) + (0x9 * (((uint128_t)x9 * x22) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x12 * x19))))));
+ { uint128_t x29 = (((uint128_t)x5 * x15) + (0x9 * ((0x2 * ((uint128_t)x7 * x22)) + (((uint128_t)x9 * x23) + ((0x2 * ((uint128_t)x11 * x21)) + (((uint128_t)x13 * x19) + (0x2 * ((uint128_t)x12 * x17))))))));
+ { uint64_t x30 = (uint64_t) (x29 >> 0x29);
+ { uint64_t x31 = ((uint64_t)x29 & 0x1ffffffffff);
+ { uint128_t x32 = (x30 + x28);
+ { uint64_t x33 = (uint64_t) (x32 >> 0x28);
+ { uint64_t x34 = ((uint64_t)x32 & 0xffffffffff);
+ { uint128_t x35 = (x33 + x27);
+ { uint64_t x36 = (uint64_t) (x35 >> 0x29);
+ { uint64_t x37 = ((uint64_t)x35 & 0x1ffffffffff);
+ { uint128_t x38 = (x36 + x26);
+ { uint64_t x39 = (uint64_t) (x38 >> 0x28);
+ { uint64_t x40 = ((uint64_t)x38 & 0xffffffffff);
+ { uint128_t x41 = (x39 + x25);
+ { uint64_t x42 = (uint64_t) (x41 >> 0x29);
+ { uint64_t x43 = ((uint64_t)x41 & 0x1ffffffffff);
+ { uint128_t x44 = (x42 + x24);
+ { uint64_t x45 = (uint64_t) (x44 >> 0x28);
+ { uint64_t x46 = ((uint64_t)x44 & 0xffffffffff);
+ { uint64_t x47 = (x31 + (0x9 * x45));
+ { uint64_t x48 = (x47 >> 0x29);
+ { uint64_t x49 = (x47 & 0x1ffffffffff);
+ { uint64_t x50 = (x48 + x34);
+ { uint64_t x51 = (x50 >> 0x28);
+ { uint64_t x52 = (x50 & 0xffffffffff);
+ out[0] = x49;
+ out[1] = x52;
+ out[2] = (x51 + x37);
+ out[3] = x40;
+ out[4] = x43;
+ out[5] = x46;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e243m9/fesquare.c b/src/Specific/solinas64_2e243m9/fesquare.c
index 5c9ea6279..c1d0d52c6 100644
--- a/src/Specific/solinas64_2e243m9/fesquare.c
+++ b/src/Specific/solinas64_2e243m9/fesquare.c
@@ -1,56 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x11 = (((uint128_t)x2 * x9) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x9 * x2))))));
-{ uint128_t x12 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + (((uint128_t)x6 * x6) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x9 * (0x2 * ((uint128_t)x9 * x9))));
-{ uint128_t x13 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x9 * (((uint128_t)x10 * x9) + ((uint128_t)x9 * x10))));
-{ uint128_t x14 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x9 * ((0x2 * ((uint128_t)x8 * x9)) + (((uint128_t)x10 * x10) + (0x2 * ((uint128_t)x9 * x8))))));
-{ uint128_t x15 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x9 * (((uint128_t)x6 * x9) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + ((uint128_t)x9 * x6))))));
-{ uint128_t x16 = (((uint128_t)x2 * x2) + (0x9 * ((0x2 * ((uint128_t)x4 * x9)) + (((uint128_t)x6 * x10) + ((0x2 * ((uint128_t)x8 * x8)) + (((uint128_t)x10 * x6) + (0x2 * ((uint128_t)x9 * x4))))))));
-{ uint64_t x17 = (uint64_t) (x16 >> 0x29);
-{ uint64_t x18 = ((uint64_t)x16 & 0x1ffffffffff);
-{ uint128_t x19 = (x17 + x15);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x28);
-{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffff);
-{ uint128_t x22 = (x20 + x14);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x29);
-{ uint64_t x24 = ((uint64_t)x22 & 0x1ffffffffff);
-{ uint128_t x25 = (x23 + x13);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x28);
-{ uint64_t x27 = ((uint64_t)x25 & 0xffffffffff);
-{ uint128_t x28 = (x26 + x12);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x29);
-{ uint64_t x30 = ((uint64_t)x28 & 0x1ffffffffff);
-{ uint128_t x31 = (x29 + x11);
-{ uint64_t x32 = (uint64_t) (x31 >> 0x28);
-{ uint64_t x33 = ((uint64_t)x31 & 0xffffffffff);
-{ uint64_t x34 = (x18 + (0x9 * x32));
-{ uint64_t x35 = (x34 >> 0x29);
-{ uint64_t x36 = (x34 & 0x1ffffffffff);
-{ uint64_t x37 = (x35 + x21);
-{ uint64_t x38 = (x37 >> 0x28);
-{ uint64_t x39 = (x37 & 0xffffffffff);
-out[0] = x33;
-out[1] = x30;
-out[2] = x27;
-out[3] = x38 + x24;
-out[4] = x39;
-out[5] = x36;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesquare(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x11 = (((uint128_t)x2 * x9) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x9 * x2))))));
+ { uint128_t x12 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + (((uint128_t)x6 * x6) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x9 * (0x2 * ((uint128_t)x9 * x9))));
+ { uint128_t x13 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x9 * (((uint128_t)x10 * x9) + ((uint128_t)x9 * x10))));
+ { uint128_t x14 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x9 * ((0x2 * ((uint128_t)x8 * x9)) + (((uint128_t)x10 * x10) + (0x2 * ((uint128_t)x9 * x8))))));
+ { uint128_t x15 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x9 * (((uint128_t)x6 * x9) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + ((uint128_t)x9 * x6))))));
+ { uint128_t x16 = (((uint128_t)x2 * x2) + (0x9 * ((0x2 * ((uint128_t)x4 * x9)) + (((uint128_t)x6 * x10) + ((0x2 * ((uint128_t)x8 * x8)) + (((uint128_t)x10 * x6) + (0x2 * ((uint128_t)x9 * x4))))))));
+ { uint64_t x17 = (uint64_t) (x16 >> 0x29);
+ { uint64_t x18 = ((uint64_t)x16 & 0x1ffffffffff);
+ { uint128_t x19 = (x17 + x15);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x28);
+ { uint64_t x21 = ((uint64_t)x19 & 0xffffffffff);
+ { uint128_t x22 = (x20 + x14);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x29);
+ { uint64_t x24 = ((uint64_t)x22 & 0x1ffffffffff);
+ { uint128_t x25 = (x23 + x13);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x28);
+ { uint64_t x27 = ((uint64_t)x25 & 0xffffffffff);
+ { uint128_t x28 = (x26 + x12);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x29);
+ { uint64_t x30 = ((uint64_t)x28 & 0x1ffffffffff);
+ { uint128_t x31 = (x29 + x11);
+ { uint64_t x32 = (uint64_t) (x31 >> 0x28);
+ { uint64_t x33 = ((uint64_t)x31 & 0xffffffffff);
+ { uint64_t x34 = (x18 + (0x9 * x32));
+ { uint64_t x35 = (x34 >> 0x29);
+ { uint64_t x36 = (x34 & 0x1ffffffffff);
+ { uint64_t x37 = (x35 + x21);
+ { uint64_t x38 = (x37 >> 0x28);
+ { uint64_t x39 = (x37 & 0xffffffffff);
+ out[0] = x36;
+ out[1] = x39;
+ out[2] = (x38 + x24);
+ out[3] = x27;
+ out[4] = x30;
+ out[5] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e243m9/freeze.c b/src/Specific/solinas64_2e243m9/freeze.c
index 4d2465721..faa9b660e 100644
--- a/src/Specific/solinas64_2e243m9/freeze.c
+++ b/src/Specific/solinas64_2e243m9/freeze.c
@@ -1,25 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x12;
-out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 41 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x1fffffffff7;;
+static void freeze(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 41 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x1fffffffff7);
+ { uint64_t x15, uint8_t x16 = Op (Syntax.SubWithGetBorrow 40 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x13, Return x4, 0xffffffffff);
+ { uint64_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 41 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x16, Return x6, 0x1ffffffffff);
+ { uint64_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 40 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x19, Return x8, 0xffffffffff);
+ { uint64_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 41 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x22, Return x10, 0x1ffffffffff);
+ { uint64_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 40 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x25, Return x9, 0xffffffffff);
+ { uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+ { uint64_t x30 = (x29 & 0x1fffffffff7);
+ { uint64_t x32, uint8_t x33 = Op (Syntax.AddWithGetCarry 41 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x12, Return x30);
+ { uint64_t x34 = (x29 & 0xffffffffff);
+ { uint64_t x36, uint8_t x37 = Op (Syntax.AddWithGetCarry 40 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x33, Return x15, Return x34);
+ { uint64_t x38 = (x29 & 0x1ffffffffff);
+ { uint64_t x40, uint8_t x41 = Op (Syntax.AddWithGetCarry 41 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x37, Return x18, Return x38);
+ { uint64_t x42 = (x29 & 0xffffffffff);
+ { uint64_t x44, uint8_t x45 = Op (Syntax.AddWithGetCarry 40 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x41, Return x21, Return x42);
+ { uint64_t x46 = (x29 & 0x1ffffffffff);
+ { uint64_t x48, uint8_t x49 = Op (Syntax.AddWithGetCarry 41 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x45, Return x24, Return x46);
+ { uint64_t x50 = (x29 & 0xffffffffff);
+ { uint64_t x52, uint8_t _ = Op (Syntax.AddWithGetCarry 40 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x49, Return x27, Return x50);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e251m9/femul.c b/src/Specific/solinas64_2e251m9/femul.c
index 8d52104c3..cced9ce9f 100644
--- a/src/Specific/solinas64_2e251m9/femul.c
+++ b/src/Specific/solinas64_2e251m9/femul.c
@@ -1,51 +1,43 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint128_t x20 = (((uint128_t)x5 * x18) + ((0x2 * ((uint128_t)x7 * x19)) + ((0x2 * ((uint128_t)x9 * x17)) + ((0x2 * ((uint128_t)x11 * x15)) + ((uint128_t)x10 * x13)))));
-{ uint128_t x21 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((0x2 * ((uint128_t)x9 * x15)) + ((uint128_t)x11 * x13)))) + (0x9 * ((uint128_t)x10 * x18)));
-{ uint128_t x22 = ((((uint128_t)x5 * x17) + ((0x2 * ((uint128_t)x7 * x15)) + ((uint128_t)x9 * x13))) + (0x9 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
-{ uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0x9 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
-{ uint128_t x24 = (((uint128_t)x5 * x13) + (0x9 * ((0x2 * ((uint128_t)x7 * x18)) + ((0x2 * ((uint128_t)x9 * x19)) + ((0x2 * ((uint128_t)x11 * x17)) + (0x2 * ((uint128_t)x10 * x15)))))));
-{ uint64_t x25 = (uint64_t) (x24 >> 0x33);
-{ uint64_t x26 = ((uint64_t)x24 & 0x7ffffffffffff);
-{ uint128_t x27 = (x25 + x23);
-{ uint64_t x28 = (uint64_t) (x27 >> 0x32);
-{ uint64_t x29 = ((uint64_t)x27 & 0x3ffffffffffff);
-{ uint128_t x30 = (x28 + x22);
-{ uint64_t x31 = (uint64_t) (x30 >> 0x32);
-{ uint64_t x32 = ((uint64_t)x30 & 0x3ffffffffffff);
-{ uint128_t x33 = (x31 + x21);
-{ uint64_t x34 = (uint64_t) (x33 >> 0x32);
-{ uint64_t x35 = ((uint64_t)x33 & 0x3ffffffffffff);
-{ uint128_t x36 = (x34 + x20);
-{ uint64_t x37 = (uint64_t) (x36 >> 0x32);
-{ uint64_t x38 = ((uint64_t)x36 & 0x3ffffffffffff);
-{ uint64_t x39 = (x26 + (0x9 * x37));
-{ uint64_t x40 = (x39 >> 0x33);
-{ uint64_t x41 = (x39 & 0x7ffffffffffff);
-{ uint64_t x42 = (x40 + x29);
-{ uint64_t x43 = (x42 >> 0x32);
-{ uint64_t x44 = (x42 & 0x3ffffffffffff);
-out[0] = x38;
-out[1] = x35;
-out[2] = x43 + x32;
-out[3] = x44;
-out[4] = x41;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x18 = in2[4];
+ { const uint64_t x19 = in2[3];
+ { const uint64_t x17 = in2[2];
+ { const uint64_t x15 = in2[1];
+ { const uint64_t x13 = in2[0];
+ { uint128_t x20 = (((uint128_t)x5 * x18) + ((0x2 * ((uint128_t)x7 * x19)) + ((0x2 * ((uint128_t)x9 * x17)) + ((0x2 * ((uint128_t)x11 * x15)) + ((uint128_t)x10 * x13)))));
+ { uint128_t x21 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((0x2 * ((uint128_t)x9 * x15)) + ((uint128_t)x11 * x13)))) + (0x9 * ((uint128_t)x10 * x18)));
+ { uint128_t x22 = ((((uint128_t)x5 * x17) + ((0x2 * ((uint128_t)x7 * x15)) + ((uint128_t)x9 * x13))) + (0x9 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
+ { uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0x9 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
+ { uint128_t x24 = (((uint128_t)x5 * x13) + (0x9 * ((0x2 * ((uint128_t)x7 * x18)) + ((0x2 * ((uint128_t)x9 * x19)) + ((0x2 * ((uint128_t)x11 * x17)) + (0x2 * ((uint128_t)x10 * x15)))))));
+ { uint64_t x25 = (uint64_t) (x24 >> 0x33);
+ { uint64_t x26 = ((uint64_t)x24 & 0x7ffffffffffff);
+ { uint128_t x27 = (x25 + x23);
+ { uint64_t x28 = (uint64_t) (x27 >> 0x32);
+ { uint64_t x29 = ((uint64_t)x27 & 0x3ffffffffffff);
+ { uint128_t x30 = (x28 + x22);
+ { uint64_t x31 = (uint64_t) (x30 >> 0x32);
+ { uint64_t x32 = ((uint64_t)x30 & 0x3ffffffffffff);
+ { uint128_t x33 = (x31 + x21);
+ { uint64_t x34 = (uint64_t) (x33 >> 0x32);
+ { uint64_t x35 = ((uint64_t)x33 & 0x3ffffffffffff);
+ { uint128_t x36 = (x34 + x20);
+ { uint64_t x37 = (uint64_t) (x36 >> 0x32);
+ { uint64_t x38 = ((uint64_t)x36 & 0x3ffffffffffff);
+ { uint64_t x39 = (x26 + (0x9 * x37));
+ { uint64_t x40 = (x39 >> 0x33);
+ { uint64_t x41 = (x39 & 0x7ffffffffffff);
+ { uint64_t x42 = (x40 + x29);
+ { uint64_t x43 = (x42 >> 0x32);
+ { uint64_t x44 = (x42 & 0x3ffffffffffff);
+ out[0] = x41;
+ out[1] = x44;
+ out[2] = (x43 + x32);
+ out[3] = x35;
+ out[4] = x38;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e251m9/fesquare.c b/src/Specific/solinas64_2e251m9/fesquare.c
index 8decd2e9c..77726bb6a 100644
--- a/src/Specific/solinas64_2e251m9/fesquare.c
+++ b/src/Specific/solinas64_2e251m9/fesquare.c
@@ -1,51 +1,38 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x9 = (((uint128_t)x2 * x7) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x7 * x2)))));
-{ uint128_t x10 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x9 * ((uint128_t)x7 * x7)));
-{ uint128_t x11 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x9 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
-{ uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x9 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
-{ uint128_t x13 = (((uint128_t)x2 * x2) + (0x9 * ((0x2 * ((uint128_t)x4 * x7)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + (0x2 * ((uint128_t)x7 * x4)))))));
-{ uint64_t x14 = (uint64_t) (x13 >> 0x33);
-{ uint64_t x15 = ((uint64_t)x13 & 0x7ffffffffffff);
-{ uint128_t x16 = (x14 + x12);
-{ uint64_t x17 = (uint64_t) (x16 >> 0x32);
-{ uint64_t x18 = ((uint64_t)x16 & 0x3ffffffffffff);
-{ uint128_t x19 = (x17 + x11);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x32);
-{ uint64_t x21 = ((uint64_t)x19 & 0x3ffffffffffff);
-{ uint128_t x22 = (x20 + x10);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x32);
-{ uint64_t x24 = ((uint64_t)x22 & 0x3ffffffffffff);
-{ uint128_t x25 = (x23 + x9);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x32);
-{ uint64_t x27 = ((uint64_t)x25 & 0x3ffffffffffff);
-{ uint64_t x28 = (x15 + (0x9 * x26));
-{ uint64_t x29 = (x28 >> 0x33);
-{ uint64_t x30 = (x28 & 0x7ffffffffffff);
-{ uint64_t x31 = (x29 + x18);
-{ uint64_t x32 = (x31 >> 0x32);
-{ uint64_t x33 = (x31 & 0x3ffffffffffff);
-out[0] = x27;
-out[1] = x24;
-out[2] = x32 + x21;
-out[3] = x33;
-out[4] = x30;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesquare(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x9 = (((uint128_t)x2 * x7) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x7 * x2)))));
+ { uint128_t x10 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x9 * ((uint128_t)x7 * x7)));
+ { uint128_t x11 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x9 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
+ { uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x9 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
+ { uint128_t x13 = (((uint128_t)x2 * x2) + (0x9 * ((0x2 * ((uint128_t)x4 * x7)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + (0x2 * ((uint128_t)x7 * x4)))))));
+ { uint64_t x14 = (uint64_t) (x13 >> 0x33);
+ { uint64_t x15 = ((uint64_t)x13 & 0x7ffffffffffff);
+ { uint128_t x16 = (x14 + x12);
+ { uint64_t x17 = (uint64_t) (x16 >> 0x32);
+ { uint64_t x18 = ((uint64_t)x16 & 0x3ffffffffffff);
+ { uint128_t x19 = (x17 + x11);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x32);
+ { uint64_t x21 = ((uint64_t)x19 & 0x3ffffffffffff);
+ { uint128_t x22 = (x20 + x10);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x32);
+ { uint64_t x24 = ((uint64_t)x22 & 0x3ffffffffffff);
+ { uint128_t x25 = (x23 + x9);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x32);
+ { uint64_t x27 = ((uint64_t)x25 & 0x3ffffffffffff);
+ { uint64_t x28 = (x15 + (0x9 * x26));
+ { uint64_t x29 = (x28 >> 0x33);
+ { uint64_t x30 = (x28 & 0x7ffffffffffff);
+ { uint64_t x31 = (x29 + x18);
+ { uint64_t x32 = (x31 >> 0x32);
+ { uint64_t x33 = (x31 & 0x3ffffffffffff);
+ out[0] = x30;
+ out[1] = x33;
+ out[2] = (x32 + x21);
+ out[3] = x24;
+ out[4] = x27;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e251m9/freeze.c b/src/Specific/solinas64_2e251m9/freeze.c
index 01fb2e21f..e15e96dc5 100644
--- a/src/Specific/solinas64_2e251m9/freeze.c
+++ b/src/Specific/solinas64_2e251m9/freeze.c
@@ -1,26 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x10; uint8_t x11 = _subborrow_u51(0x0, x2, 0x7fffffffffff7, &x10);
-out[0] = uint64_t x13;
-out[1] = uint8_t x14 = Op Syntax.SubWithGetBorrow 50 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 x11;
-out[2] = x4;
-out[3] = 0x3ffffffffffff;;
+static void freeze(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x10; uint8_t x11 = _subborrow_u51(0x0, x2, 0x7fffffffffff7, &x10);
+ { uint64_t x13, uint8_t x14 = Op (Syntax.SubWithGetBorrow 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x11, Return x4, 0x3ffffffffffff);
+ { uint64_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x14, Return x6, 0x3ffffffffffff);
+ { uint64_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x17, Return x8, 0x3ffffffffffff);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x20, Return x7, 0x3ffffffffffff);
+ { uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
+ { uint64_t x25 = (x24 & 0x7fffffffffff7);
+ { uint64_t x27; uint8_t x28 = _addcarryx_u51(0x0, x10, x25, &x27);
+ { uint64_t x29 = (x24 & 0x3ffffffffffff);
+ { uint64_t x31, uint8_t x32 = Op (Syntax.AddWithGetCarry 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x28, Return x13, Return x29);
+ { uint64_t x33 = (x24 & 0x3ffffffffffff);
+ { uint64_t x35, uint8_t x36 = Op (Syntax.AddWithGetCarry 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x32, Return x16, Return x33);
+ { uint64_t x37 = (x24 & 0x3ffffffffffff);
+ { uint64_t x39, uint8_t x40 = Op (Syntax.AddWithGetCarry 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x36, Return x19, Return x37);
+ { uint64_t x41 = (x24 & 0x3ffffffffffff);
+ { uint64_t x43, uint8_t _ = Op (Syntax.AddWithGetCarry 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x40, Return x22, Return x41);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e254m127x2e240m1/freeze.c b/src/Specific/solinas64_2e254m127x2e240m1/freeze.c
index 55d13c701..8bb96f6c3 100644
--- a/src/Specific/solinas64_2e254m127x2e240m1/freeze.c
+++ b/src/Specific/solinas64_2e254m127x2e240m1/freeze.c
@@ -1,25 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x12;
-out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 43 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7ffffffffff;;
+static void freeze(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 43 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x7ffffffffff);
+ { uint64_t x15, uint8_t x16 = Op (Syntax.SubWithGetBorrow 42 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x13, Return x4, 0x3ffffffffff);
+ { uint64_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 42 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x16, Return x6, 0x3ffffffffff);
+ { uint64_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 43 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x19, Return x8, 0x7ffffffffff);
+ { uint64_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 42 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x22, Return x10, 0x3ffffffffff);
+ { uint64_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 42 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x25, Return x9, 0x3f80fffffff);
+ { uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+ { uint64_t x30 = (x29 & 0x7ffffffffff);
+ { uint64_t x32, uint8_t x33 = Op (Syntax.AddWithGetCarry 43 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x12, Return x30);
+ { uint64_t x34 = (x29 & 0x3ffffffffff);
+ { uint64_t x36, uint8_t x37 = Op (Syntax.AddWithGetCarry 42 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x33, Return x15, Return x34);
+ { uint64_t x38 = (x29 & 0x3ffffffffff);
+ { uint64_t x40, uint8_t x41 = Op (Syntax.AddWithGetCarry 42 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x37, Return x18, Return x38);
+ { uint64_t x42 = (x29 & 0x7ffffffffff);
+ { uint64_t x44, uint8_t x45 = Op (Syntax.AddWithGetCarry 43 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x41, Return x21, Return x42);
+ { uint64_t x46 = (x29 & 0x3ffffffffff);
+ { uint64_t x48, uint8_t x49 = Op (Syntax.AddWithGetCarry 42 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x45, Return x24, Return x46);
+ { uint64_t x50 = (x29 & 0x3f80fffffff);
+ { uint64_t x52, uint8_t _ = Op (Syntax.AddWithGetCarry 42 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x49, Return x27, Return x50);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e255m19/femul.c b/src/Specific/solinas64_2e255m19/femul.c
index 6a3d29cd8..8094da7fc 100644
--- a/src/Specific/solinas64_2e255m19/femul.c
+++ b/src/Specific/solinas64_2e255m19/femul.c
@@ -1,51 +1,43 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint128_t x20 = (((uint128_t)x5 * x18) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + (((uint128_t)x11 * x15) + ((uint128_t)x10 * x13)))));
-{ uint128_t x21 = ((((uint128_t)x5 * x19) + (((uint128_t)x7 * x17) + (((uint128_t)x9 * x15) + ((uint128_t)x11 * x13)))) + (0x13 * ((uint128_t)x10 * x18)));
-{ uint128_t x22 = ((((uint128_t)x5 * x17) + (((uint128_t)x7 * x15) + ((uint128_t)x9 * x13))) + (0x13 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
-{ uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0x13 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
-{ uint128_t x24 = (((uint128_t)x5 * x13) + (0x13 * (((uint128_t)x7 * x18) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x10 * x15))))));
-{ uint64_t x25 = (uint64_t) (x24 >> 0x33);
-{ uint64_t x26 = ((uint64_t)x24 & 0x7ffffffffffff);
-{ uint128_t x27 = (x25 + x23);
-{ uint64_t x28 = (uint64_t) (x27 >> 0x33);
-{ uint64_t x29 = ((uint64_t)x27 & 0x7ffffffffffff);
-{ uint128_t x30 = (x28 + x22);
-{ uint64_t x31 = (uint64_t) (x30 >> 0x33);
-{ uint64_t x32 = ((uint64_t)x30 & 0x7ffffffffffff);
-{ uint128_t x33 = (x31 + x21);
-{ uint64_t x34 = (uint64_t) (x33 >> 0x33);
-{ uint64_t x35 = ((uint64_t)x33 & 0x7ffffffffffff);
-{ uint128_t x36 = (x34 + x20);
-{ uint64_t x37 = (uint64_t) (x36 >> 0x33);
-{ uint64_t x38 = ((uint64_t)x36 & 0x7ffffffffffff);
-{ uint64_t x39 = (x26 + (0x13 * x37));
-{ uint64_t x40 = (x39 >> 0x33);
-{ uint64_t x41 = (x39 & 0x7ffffffffffff);
-{ uint64_t x42 = (x40 + x29);
-{ uint64_t x43 = (x42 >> 0x33);
-{ uint64_t x44 = (x42 & 0x7ffffffffffff);
-out[0] = x38;
-out[1] = x35;
-out[2] = x43 + x32;
-out[3] = x44;
-out[4] = x41;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x18 = in2[4];
+ { const uint64_t x19 = in2[3];
+ { const uint64_t x17 = in2[2];
+ { const uint64_t x15 = in2[1];
+ { const uint64_t x13 = in2[0];
+ { uint128_t x20 = (((uint128_t)x5 * x18) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + (((uint128_t)x11 * x15) + ((uint128_t)x10 * x13)))));
+ { uint128_t x21 = ((((uint128_t)x5 * x19) + (((uint128_t)x7 * x17) + (((uint128_t)x9 * x15) + ((uint128_t)x11 * x13)))) + (0x13 * ((uint128_t)x10 * x18)));
+ { uint128_t x22 = ((((uint128_t)x5 * x17) + (((uint128_t)x7 * x15) + ((uint128_t)x9 * x13))) + (0x13 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
+ { uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0x13 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
+ { uint128_t x24 = (((uint128_t)x5 * x13) + (0x13 * (((uint128_t)x7 * x18) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x10 * x15))))));
+ { uint64_t x25 = (uint64_t) (x24 >> 0x33);
+ { uint64_t x26 = ((uint64_t)x24 & 0x7ffffffffffff);
+ { uint128_t x27 = (x25 + x23);
+ { uint64_t x28 = (uint64_t) (x27 >> 0x33);
+ { uint64_t x29 = ((uint64_t)x27 & 0x7ffffffffffff);
+ { uint128_t x30 = (x28 + x22);
+ { uint64_t x31 = (uint64_t) (x30 >> 0x33);
+ { uint64_t x32 = ((uint64_t)x30 & 0x7ffffffffffff);
+ { uint128_t x33 = (x31 + x21);
+ { uint64_t x34 = (uint64_t) (x33 >> 0x33);
+ { uint64_t x35 = ((uint64_t)x33 & 0x7ffffffffffff);
+ { uint128_t x36 = (x34 + x20);
+ { uint64_t x37 = (uint64_t) (x36 >> 0x33);
+ { uint64_t x38 = ((uint64_t)x36 & 0x7ffffffffffff);
+ { uint64_t x39 = (x26 + (0x13 * x37));
+ { uint64_t x40 = (x39 >> 0x33);
+ { uint64_t x41 = (x39 & 0x7ffffffffffff);
+ { uint64_t x42 = (x40 + x29);
+ { uint64_t x43 = (x42 >> 0x33);
+ { uint64_t x44 = (x42 & 0x7ffffffffffff);
+ out[0] = x41;
+ out[1] = x44;
+ out[2] = (x43 + x32);
+ out[3] = x35;
+ out[4] = x38;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e255m19/fesquare.c b/src/Specific/solinas64_2e255m19/fesquare.c
index 628d9aaa7..909c3289c 100644
--- a/src/Specific/solinas64_2e255m19/fesquare.c
+++ b/src/Specific/solinas64_2e255m19/fesquare.c
@@ -1,51 +1,38 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x9 = (((uint128_t)x2 * x7) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x7 * x2)))));
-{ uint128_t x10 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x13 * ((uint128_t)x7 * x7)));
-{ uint128_t x11 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x13 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
-{ uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x13 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
-{ uint128_t x13 = (((uint128_t)x2 * x2) + (0x13 * (((uint128_t)x4 * x7) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((uint128_t)x7 * x4))))));
-{ uint64_t x14 = (uint64_t) (x13 >> 0x33);
-{ uint64_t x15 = ((uint64_t)x13 & 0x7ffffffffffff);
-{ uint128_t x16 = (x14 + x12);
-{ uint64_t x17 = (uint64_t) (x16 >> 0x33);
-{ uint64_t x18 = ((uint64_t)x16 & 0x7ffffffffffff);
-{ uint128_t x19 = (x17 + x11);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x33);
-{ uint64_t x21 = ((uint64_t)x19 & 0x7ffffffffffff);
-{ uint128_t x22 = (x20 + x10);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x33);
-{ uint64_t x24 = ((uint64_t)x22 & 0x7ffffffffffff);
-{ uint128_t x25 = (x23 + x9);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x33);
-{ uint64_t x27 = ((uint64_t)x25 & 0x7ffffffffffff);
-{ uint64_t x28 = (x15 + (0x13 * x26));
-{ uint64_t x29 = (x28 >> 0x33);
-{ uint64_t x30 = (x28 & 0x7ffffffffffff);
-{ uint64_t x31 = (x29 + x18);
-{ uint64_t x32 = (x31 >> 0x33);
-{ uint64_t x33 = (x31 & 0x7ffffffffffff);
-out[0] = x27;
-out[1] = x24;
-out[2] = x32 + x21;
-out[3] = x33;
-out[4] = x30;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesquare(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x9 = (((uint128_t)x2 * x7) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x7 * x2)))));
+ { uint128_t x10 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x13 * ((uint128_t)x7 * x7)));
+ { uint128_t x11 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x13 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
+ { uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x13 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
+ { uint128_t x13 = (((uint128_t)x2 * x2) + (0x13 * (((uint128_t)x4 * x7) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((uint128_t)x7 * x4))))));
+ { uint64_t x14 = (uint64_t) (x13 >> 0x33);
+ { uint64_t x15 = ((uint64_t)x13 & 0x7ffffffffffff);
+ { uint128_t x16 = (x14 + x12);
+ { uint64_t x17 = (uint64_t) (x16 >> 0x33);
+ { uint64_t x18 = ((uint64_t)x16 & 0x7ffffffffffff);
+ { uint128_t x19 = (x17 + x11);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x33);
+ { uint64_t x21 = ((uint64_t)x19 & 0x7ffffffffffff);
+ { uint128_t x22 = (x20 + x10);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x33);
+ { uint64_t x24 = ((uint64_t)x22 & 0x7ffffffffffff);
+ { uint128_t x25 = (x23 + x9);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x33);
+ { uint64_t x27 = ((uint64_t)x25 & 0x7ffffffffffff);
+ { uint64_t x28 = (x15 + (0x13 * x26));
+ { uint64_t x29 = (x28 >> 0x33);
+ { uint64_t x30 = (x28 & 0x7ffffffffffff);
+ { uint64_t x31 = (x29 + x18);
+ { uint64_t x32 = (x31 >> 0x33);
+ { uint64_t x33 = (x31 & 0x7ffffffffffff);
+ out[0] = x30;
+ out[1] = x33;
+ out[2] = (x32 + x21);
+ out[3] = x24;
+ out[4] = x27;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e255m19/freeze.c b/src/Specific/solinas64_2e255m19/freeze.c
index 1cebfd785..b91d0e1f1 100644
--- a/src/Specific/solinas64_2e255m19/freeze.c
+++ b/src/Specific/solinas64_2e255m19/freeze.c
@@ -1,42 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x10; uint8_t x11 = _subborrow_u51(0x0, x2, 0x7ffffffffffed, &x10);
-{ uint64_t x13; uint8_t x14 = _subborrow_u51(x11, x4, 0x7ffffffffffff, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u51(x14, x6, 0x7ffffffffffff, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u51(x17, x8, 0x7ffffffffffff, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u51(x20, x7, 0x7ffffffffffff, &x22);
-{ uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
-{ uint64_t x25 = (x24 & 0x7ffffffffffed);
-{ uint64_t x27; uint8_t x28 = _addcarryx_u51(0x0, x10, x25, &x27);
-{ uint64_t x29 = (x24 & 0x7ffffffffffff);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u51(x28, x13, x29, &x31);
-{ uint64_t x33 = (x24 & 0x7ffffffffffff);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u51(x32, x16, x33, &x35);
-{ uint64_t x37 = (x24 & 0x7ffffffffffff);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u51(x36, x19, x37, &x39);
-{ uint64_t x41 = (x24 & 0x7ffffffffffff);
-{ uint64_t x43; uint8_t _ = _addcarryx_u51(x40, x22, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-out[4] = x27;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void freeze(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x10; uint8_t x11 = _subborrow_u51(0x0, x2, 0x7ffffffffffed, &x10);
+ { uint64_t x13; uint8_t x14 = _subborrow_u51(x11, x4, 0x7ffffffffffff, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u51(x14, x6, 0x7ffffffffffff, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u51(x17, x8, 0x7ffffffffffff, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u51(x20, x7, 0x7ffffffffffff, &x22);
+ { uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
+ { uint64_t x25 = (x24 & 0x7ffffffffffed);
+ { uint64_t x27; uint8_t x28 = _addcarryx_u51(0x0, x10, x25, &x27);
+ { uint64_t x29 = (x24 & 0x7ffffffffffff);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u51(x28, x13, x29, &x31);
+ { uint64_t x33 = (x24 & 0x7ffffffffffff);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u51(x32, x16, x33, &x35);
+ { uint64_t x37 = (x24 & 0x7ffffffffffff);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u51(x36, x19, x37, &x39);
+ { uint64_t x41 = (x24 & 0x7ffffffffffff);
+ { uint64_t x43; uint8_t _ = _addcarryx_u51(x40, x22, x41, &x43);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e255m2e4m2e1m1/femul.c b/src/Specific/solinas64_2e255m2e4m2e1m1/femul.c
index 72c26597d..17aa086fd 100644
--- a/src/Specific/solinas64_2e255m2e4m2e1m1/femul.c
+++ b/src/Specific/solinas64_2e255m2e4m2e1m1/femul.c
@@ -1,64 +1,56 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint128_t x20 = (((uint128_t)x5 * x18) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + (((uint128_t)x11 * x15) + ((uint128_t)x10 * x13)))));
-{ uint128_t x21 = ((((uint128_t)x5 * x19) + (((uint128_t)x7 * x17) + (((uint128_t)x9 * x15) + ((uint128_t)x11 * x13)))) + (((uint128_t)x10 * x18) + ((0x2 * ((uint128_t)x10 * x18)) + (0x10 * ((uint128_t)x10 * x18)))));
-{ uint128_t x22 = ((((uint128_t)x5 * x17) + (((uint128_t)x7 * x15) + ((uint128_t)x9 * x13))) + ((((uint128_t)x11 * x18) + ((uint128_t)x10 * x19)) + ((0x2 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))) + (0x10 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))))));
-{ uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + ((((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17))) + ((0x2 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))) + (0x10 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))))));
-{ uint128_t x24 = (((uint128_t)x5 * x13) + ((((uint128_t)x7 * x18) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x10 * x15)))) + ((0x2 * (((uint128_t)x7 * x18) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x10 * x15))))) + (0x10 * (((uint128_t)x7 * x18) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x10 * x15))))))));
-{ uint64_t x25 = (uint64_t) (x20 >> 0x33);
-{ uint64_t x26 = ((uint64_t)x20 & 0x7ffffffffffff);
-{ uint128_t x27 = (((uint128_t)0x8000000000000 * x25) + x26);
-{ uint64_t x28 = (uint64_t) (x27 >> 0x33);
-{ uint64_t x29 = ((uint64_t)x27 & 0x7ffffffffffff);
-{ uint128_t x30 = (((uint128_t)0x8000000000000 * x28) + x29);
-{ uint64_t x31 = (uint64_t) (x30 >> 0x33);
-{ uint64_t x32 = ((uint64_t)x30 & 0x7ffffffffffff);
-{ uint128_t x33 = (((uint128_t)0x8000000000000 * x31) + x32);
-{ uint64_t x34 = (uint64_t) (x33 >> 0x33);
-{ uint64_t x35 = ((uint64_t)x33 & 0x7ffffffffffff);
-{ uint128_t x36 = (x24 + (x34 + ((0x2 * x34) + (0x10 * x34))));
-{ uint64_t x37 = (uint64_t) (x36 >> 0x33);
-{ uint64_t x38 = ((uint64_t)x36 & 0x7ffffffffffff);
-{ uint128_t x39 = (x37 + x23);
-{ uint64_t x40 = (uint64_t) (x39 >> 0x33);
-{ uint64_t x41 = ((uint64_t)x39 & 0x7ffffffffffff);
-{ uint128_t x42 = (x40 + x22);
-{ uint64_t x43 = (uint64_t) (x42 >> 0x33);
-{ uint64_t x44 = ((uint64_t)x42 & 0x7ffffffffffff);
-{ uint128_t x45 = (x43 + x21);
-{ uint64_t x46 = (uint64_t) (x45 >> 0x33);
-{ uint64_t x47 = ((uint64_t)x45 & 0x7ffffffffffff);
-{ uint64_t x48 = (x46 + x35);
-{ uint64_t x49 = (x48 >> 0x33);
-{ uint64_t x50 = (x48 & 0x7ffffffffffff);
-{ uint64_t x51 = (x38 + (x49 + ((0x2 * x49) + (0x10 * x49))));
-{ uint64_t x52 = (x51 >> 0x33);
-{ uint64_t x53 = (x51 & 0x7ffffffffffff);
-{ uint64_t x54 = (x53 >> 0x33);
-{ uint64_t x55 = (x53 & 0x7ffffffffffff);
-{ uint64_t x56 = (x55 >> 0x33);
-{ uint64_t x57 = (x55 & 0x7ffffffffffff);
-out[0] = x50;
-out[1] = x47;
-out[2] = x44;
-out[3] = x56 + x54 + x52 + x41;
-out[4] = x57;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x18 = in2[4];
+ { const uint64_t x19 = in2[3];
+ { const uint64_t x17 = in2[2];
+ { const uint64_t x15 = in2[1];
+ { const uint64_t x13 = in2[0];
+ { uint128_t x20 = (((uint128_t)x5 * x18) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + (((uint128_t)x11 * x15) + ((uint128_t)x10 * x13)))));
+ { uint128_t x21 = ((((uint128_t)x5 * x19) + (((uint128_t)x7 * x17) + (((uint128_t)x9 * x15) + ((uint128_t)x11 * x13)))) + (((uint128_t)x10 * x18) + ((0x2 * ((uint128_t)x10 * x18)) + (0x10 * ((uint128_t)x10 * x18)))));
+ { uint128_t x22 = ((((uint128_t)x5 * x17) + (((uint128_t)x7 * x15) + ((uint128_t)x9 * x13))) + ((((uint128_t)x11 * x18) + ((uint128_t)x10 * x19)) + ((0x2 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))) + (0x10 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))))));
+ { uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + ((((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17))) + ((0x2 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))) + (0x10 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))))));
+ { uint128_t x24 = (((uint128_t)x5 * x13) + ((((uint128_t)x7 * x18) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x10 * x15)))) + ((0x2 * (((uint128_t)x7 * x18) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x10 * x15))))) + (0x10 * (((uint128_t)x7 * x18) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x10 * x15))))))));
+ { uint64_t x25 = (uint64_t) (x20 >> 0x33);
+ { uint64_t x26 = ((uint64_t)x20 & 0x7ffffffffffff);
+ { uint128_t x27 = (((uint128_t)0x8000000000000 * x25) + x26);
+ { uint64_t x28 = (uint64_t) (x27 >> 0x33);
+ { uint64_t x29 = ((uint64_t)x27 & 0x7ffffffffffff);
+ { uint128_t x30 = (((uint128_t)0x8000000000000 * x28) + x29);
+ { uint64_t x31 = (uint64_t) (x30 >> 0x33);
+ { uint64_t x32 = ((uint64_t)x30 & 0x7ffffffffffff);
+ { uint128_t x33 = (((uint128_t)0x8000000000000 * x31) + x32);
+ { uint64_t x34 = (uint64_t) (x33 >> 0x33);
+ { uint64_t x35 = ((uint64_t)x33 & 0x7ffffffffffff);
+ { uint128_t x36 = (x24 + (x34 + ((0x2 * x34) + (0x10 * x34))));
+ { uint64_t x37 = (uint64_t) (x36 >> 0x33);
+ { uint64_t x38 = ((uint64_t)x36 & 0x7ffffffffffff);
+ { uint128_t x39 = (x37 + x23);
+ { uint64_t x40 = (uint64_t) (x39 >> 0x33);
+ { uint64_t x41 = ((uint64_t)x39 & 0x7ffffffffffff);
+ { uint128_t x42 = (x40 + x22);
+ { uint64_t x43 = (uint64_t) (x42 >> 0x33);
+ { uint64_t x44 = ((uint64_t)x42 & 0x7ffffffffffff);
+ { uint128_t x45 = (x43 + x21);
+ { uint64_t x46 = (uint64_t) (x45 >> 0x33);
+ { uint64_t x47 = ((uint64_t)x45 & 0x7ffffffffffff);
+ { uint64_t x48 = (x46 + x35);
+ { uint64_t x49 = (x48 >> 0x33);
+ { uint64_t x50 = (x48 & 0x7ffffffffffff);
+ { uint64_t x51 = (x38 + (x49 + ((0x2 * x49) + (0x10 * x49))));
+ { uint64_t x52 = (x51 >> 0x33);
+ { uint64_t x53 = (x51 & 0x7ffffffffffff);
+ { uint64_t x54 = (x53 >> 0x33);
+ { uint64_t x55 = (x53 & 0x7ffffffffffff);
+ { uint64_t x56 = (x55 >> 0x33);
+ { uint64_t x57 = (x55 & 0x7ffffffffffff);
+ out[0] = x57;
+ out[1] = (x56 + (x54 + (x52 + x41)));
+ out[2] = x44;
+ out[3] = x47;
+ out[4] = x50;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e255m2e4m2e1m1/fesquare.c b/src/Specific/solinas64_2e255m2e4m2e1m1/fesquare.c
index 29c0cc597..711c6370b 100644
--- a/src/Specific/solinas64_2e255m2e4m2e1m1/fesquare.c
+++ b/src/Specific/solinas64_2e255m2e4m2e1m1/fesquare.c
@@ -1,64 +1,51 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x9 = (((uint128_t)x2 * x7) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x7 * x2)))));
-{ uint128_t x10 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (((uint128_t)x7 * x7) + ((0x2 * ((uint128_t)x7 * x7)) + (0x10 * ((uint128_t)x7 * x7)))));
-{ uint128_t x11 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + ((((uint128_t)x8 * x7) + ((uint128_t)x7 * x8)) + ((0x2 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))) + (0x10 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))))));
-{ uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + ((((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6))) + ((0x2 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))) + (0x10 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))))));
-{ uint128_t x13 = (((uint128_t)x2 * x2) + ((((uint128_t)x4 * x7) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((uint128_t)x7 * x4)))) + ((0x2 * (((uint128_t)x4 * x7) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((uint128_t)x7 * x4))))) + (0x10 * (((uint128_t)x4 * x7) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((uint128_t)x7 * x4))))))));
-{ uint64_t x14 = (uint64_t) (x9 >> 0x33);
-{ uint64_t x15 = ((uint64_t)x9 & 0x7ffffffffffff);
-{ uint128_t x16 = (((uint128_t)0x8000000000000 * x14) + x15);
-{ uint64_t x17 = (uint64_t) (x16 >> 0x33);
-{ uint64_t x18 = ((uint64_t)x16 & 0x7ffffffffffff);
-{ uint128_t x19 = (((uint128_t)0x8000000000000 * x17) + x18);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x33);
-{ uint64_t x21 = ((uint64_t)x19 & 0x7ffffffffffff);
-{ uint128_t x22 = (((uint128_t)0x8000000000000 * x20) + x21);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x33);
-{ uint64_t x24 = ((uint64_t)x22 & 0x7ffffffffffff);
-{ uint128_t x25 = (x13 + (x23 + ((0x2 * x23) + (0x10 * x23))));
-{ uint64_t x26 = (uint64_t) (x25 >> 0x33);
-{ uint64_t x27 = ((uint64_t)x25 & 0x7ffffffffffff);
-{ uint128_t x28 = (x26 + x12);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x33);
-{ uint64_t x30 = ((uint64_t)x28 & 0x7ffffffffffff);
-{ uint128_t x31 = (x29 + x11);
-{ uint64_t x32 = (uint64_t) (x31 >> 0x33);
-{ uint64_t x33 = ((uint64_t)x31 & 0x7ffffffffffff);
-{ uint128_t x34 = (x32 + x10);
-{ uint64_t x35 = (uint64_t) (x34 >> 0x33);
-{ uint64_t x36 = ((uint64_t)x34 & 0x7ffffffffffff);
-{ uint64_t x37 = (x35 + x24);
-{ uint64_t x38 = (x37 >> 0x33);
-{ uint64_t x39 = (x37 & 0x7ffffffffffff);
-{ uint64_t x40 = (x27 + (x38 + ((0x2 * x38) + (0x10 * x38))));
-{ uint64_t x41 = (x40 >> 0x33);
-{ uint64_t x42 = (x40 & 0x7ffffffffffff);
-{ uint64_t x43 = (x42 >> 0x33);
-{ uint64_t x44 = (x42 & 0x7ffffffffffff);
-{ uint64_t x45 = (x44 >> 0x33);
-{ uint64_t x46 = (x44 & 0x7ffffffffffff);
-out[0] = x39;
-out[1] = x36;
-out[2] = x33;
-out[3] = x45 + x43 + x41 + x30;
-out[4] = x46;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesquare(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x9 = (((uint128_t)x2 * x7) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x7 * x2)))));
+ { uint128_t x10 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (((uint128_t)x7 * x7) + ((0x2 * ((uint128_t)x7 * x7)) + (0x10 * ((uint128_t)x7 * x7)))));
+ { uint128_t x11 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + ((((uint128_t)x8 * x7) + ((uint128_t)x7 * x8)) + ((0x2 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))) + (0x10 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))))));
+ { uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + ((((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6))) + ((0x2 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))) + (0x10 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))))));
+ { uint128_t x13 = (((uint128_t)x2 * x2) + ((((uint128_t)x4 * x7) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((uint128_t)x7 * x4)))) + ((0x2 * (((uint128_t)x4 * x7) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((uint128_t)x7 * x4))))) + (0x10 * (((uint128_t)x4 * x7) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((uint128_t)x7 * x4))))))));
+ { uint64_t x14 = (uint64_t) (x9 >> 0x33);
+ { uint64_t x15 = ((uint64_t)x9 & 0x7ffffffffffff);
+ { uint128_t x16 = (((uint128_t)0x8000000000000 * x14) + x15);
+ { uint64_t x17 = (uint64_t) (x16 >> 0x33);
+ { uint64_t x18 = ((uint64_t)x16 & 0x7ffffffffffff);
+ { uint128_t x19 = (((uint128_t)0x8000000000000 * x17) + x18);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x33);
+ { uint64_t x21 = ((uint64_t)x19 & 0x7ffffffffffff);
+ { uint128_t x22 = (((uint128_t)0x8000000000000 * x20) + x21);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x33);
+ { uint64_t x24 = ((uint64_t)x22 & 0x7ffffffffffff);
+ { uint128_t x25 = (x13 + (x23 + ((0x2 * x23) + (0x10 * x23))));
+ { uint64_t x26 = (uint64_t) (x25 >> 0x33);
+ { uint64_t x27 = ((uint64_t)x25 & 0x7ffffffffffff);
+ { uint128_t x28 = (x26 + x12);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x33);
+ { uint64_t x30 = ((uint64_t)x28 & 0x7ffffffffffff);
+ { uint128_t x31 = (x29 + x11);
+ { uint64_t x32 = (uint64_t) (x31 >> 0x33);
+ { uint64_t x33 = ((uint64_t)x31 & 0x7ffffffffffff);
+ { uint128_t x34 = (x32 + x10);
+ { uint64_t x35 = (uint64_t) (x34 >> 0x33);
+ { uint64_t x36 = ((uint64_t)x34 & 0x7ffffffffffff);
+ { uint64_t x37 = (x35 + x24);
+ { uint64_t x38 = (x37 >> 0x33);
+ { uint64_t x39 = (x37 & 0x7ffffffffffff);
+ { uint64_t x40 = (x27 + (x38 + ((0x2 * x38) + (0x10 * x38))));
+ { uint64_t x41 = (x40 >> 0x33);
+ { uint64_t x42 = (x40 & 0x7ffffffffffff);
+ { uint64_t x43 = (x42 >> 0x33);
+ { uint64_t x44 = (x42 & 0x7ffffffffffff);
+ { uint64_t x45 = (x44 >> 0x33);
+ { uint64_t x46 = (x44 & 0x7ffffffffffff);
+ out[0] = x46;
+ out[1] = (x45 + (x43 + (x41 + x30)));
+ out[2] = x33;
+ out[3] = x36;
+ out[4] = x39;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e255m2e4m2e1m1/freeze.c b/src/Specific/solinas64_2e255m2e4m2e1m1/freeze.c
index 1cebfd785..b91d0e1f1 100644
--- a/src/Specific/solinas64_2e255m2e4m2e1m1/freeze.c
+++ b/src/Specific/solinas64_2e255m2e4m2e1m1/freeze.c
@@ -1,42 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x10; uint8_t x11 = _subborrow_u51(0x0, x2, 0x7ffffffffffed, &x10);
-{ uint64_t x13; uint8_t x14 = _subborrow_u51(x11, x4, 0x7ffffffffffff, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u51(x14, x6, 0x7ffffffffffff, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u51(x17, x8, 0x7ffffffffffff, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u51(x20, x7, 0x7ffffffffffff, &x22);
-{ uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
-{ uint64_t x25 = (x24 & 0x7ffffffffffed);
-{ uint64_t x27; uint8_t x28 = _addcarryx_u51(0x0, x10, x25, &x27);
-{ uint64_t x29 = (x24 & 0x7ffffffffffff);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u51(x28, x13, x29, &x31);
-{ uint64_t x33 = (x24 & 0x7ffffffffffff);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u51(x32, x16, x33, &x35);
-{ uint64_t x37 = (x24 & 0x7ffffffffffff);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u51(x36, x19, x37, &x39);
-{ uint64_t x41 = (x24 & 0x7ffffffffffff);
-{ uint64_t x43; uint8_t _ = _addcarryx_u51(x40, x22, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-out[4] = x27;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void freeze(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x10; uint8_t x11 = _subborrow_u51(0x0, x2, 0x7ffffffffffed, &x10);
+ { uint64_t x13; uint8_t x14 = _subborrow_u51(x11, x4, 0x7ffffffffffff, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u51(x14, x6, 0x7ffffffffffff, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u51(x17, x8, 0x7ffffffffffff, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u51(x20, x7, 0x7ffffffffffff, &x22);
+ { uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
+ { uint64_t x25 = (x24 & 0x7ffffffffffed);
+ { uint64_t x27; uint8_t x28 = _addcarryx_u51(0x0, x10, x25, &x27);
+ { uint64_t x29 = (x24 & 0x7ffffffffffff);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u51(x28, x13, x29, &x31);
+ { uint64_t x33 = (x24 & 0x7ffffffffffff);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u51(x32, x16, x33, &x35);
+ { uint64_t x37 = (x24 & 0x7ffffffffffff);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u51(x36, x19, x37, &x39);
+ { uint64_t x41 = (x24 & 0x7ffffffffffff);
+ { uint64_t x43; uint8_t _ = _addcarryx_u51(x40, x22, x41, &x43);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e255m765/femul.c b/src/Specific/solinas64_2e255m765/femul.c
index 010cb8eba..57eb9764f 100644
--- a/src/Specific/solinas64_2e255m765/femul.c
+++ b/src/Specific/solinas64_2e255m765/femul.c
@@ -1,51 +1,43 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint128_t x20 = (((uint128_t)x5 * x18) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + (((uint128_t)x11 * x15) + ((uint128_t)x10 * x13)))));
-{ uint128_t x21 = ((((uint128_t)x5 * x19) + (((uint128_t)x7 * x17) + (((uint128_t)x9 * x15) + ((uint128_t)x11 * x13)))) + (0x2fd * ((uint128_t)x10 * x18)));
-{ uint128_t x22 = ((((uint128_t)x5 * x17) + (((uint128_t)x7 * x15) + ((uint128_t)x9 * x13))) + (0x2fd * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
-{ uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0x2fd * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
-{ uint128_t x24 = (((uint128_t)x5 * x13) + (0x2fd * (((uint128_t)x7 * x18) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x10 * x15))))));
-{ uint128_t x25 = (x24 >> 0x33);
-{ uint64_t x26 = ((uint64_t)x24 & 0x7ffffffffffff);
-{ uint128_t x27 = (x25 + x23);
-{ uint128_t x28 = (x27 >> 0x33);
-{ uint64_t x29 = ((uint64_t)x27 & 0x7ffffffffffff);
-{ uint128_t x30 = (x28 + x22);
-{ uint128_t x31 = (x30 >> 0x33);
-{ uint64_t x32 = ((uint64_t)x30 & 0x7ffffffffffff);
-{ uint128_t x33 = (x31 + x21);
-{ uint128_t x34 = (x33 >> 0x33);
-{ uint64_t x35 = ((uint64_t)x33 & 0x7ffffffffffff);
-{ uint128_t x36 = (x34 + x20);
-{ uint64_t x37 = (uint64_t) (x36 >> 0x33);
-{ uint64_t x38 = ((uint64_t)x36 & 0x7ffffffffffff);
-{ uint128_t x39 = (x26 + ((uint128_t)0x2fd * x37));
-{ uint64_t x40 = (uint64_t) (x39 >> 0x33);
-{ uint64_t x41 = ((uint64_t)x39 & 0x7ffffffffffff);
-{ uint64_t x42 = (x40 + x29);
-{ uint64_t x43 = (x42 >> 0x33);
-{ uint64_t x44 = (x42 & 0x7ffffffffffff);
-out[0] = x38;
-out[1] = x35;
-out[2] = x43 + x32;
-out[3] = x44;
-out[4] = x41;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x18 = in2[4];
+ { const uint64_t x19 = in2[3];
+ { const uint64_t x17 = in2[2];
+ { const uint64_t x15 = in2[1];
+ { const uint64_t x13 = in2[0];
+ { uint128_t x20 = (((uint128_t)x5 * x18) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + (((uint128_t)x11 * x15) + ((uint128_t)x10 * x13)))));
+ { uint128_t x21 = ((((uint128_t)x5 * x19) + (((uint128_t)x7 * x17) + (((uint128_t)x9 * x15) + ((uint128_t)x11 * x13)))) + (0x2fd * ((uint128_t)x10 * x18)));
+ { uint128_t x22 = ((((uint128_t)x5 * x17) + (((uint128_t)x7 * x15) + ((uint128_t)x9 * x13))) + (0x2fd * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
+ { uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0x2fd * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
+ { uint128_t x24 = (((uint128_t)x5 * x13) + (0x2fd * (((uint128_t)x7 * x18) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x10 * x15))))));
+ { uint128_t x25 = (x24 >> 0x33);
+ { uint64_t x26 = ((uint64_t)x24 & 0x7ffffffffffff);
+ { uint128_t x27 = (x25 + x23);
+ { uint128_t x28 = (x27 >> 0x33);
+ { uint64_t x29 = ((uint64_t)x27 & 0x7ffffffffffff);
+ { uint128_t x30 = (x28 + x22);
+ { uint128_t x31 = (x30 >> 0x33);
+ { uint64_t x32 = ((uint64_t)x30 & 0x7ffffffffffff);
+ { uint128_t x33 = (x31 + x21);
+ { uint128_t x34 = (x33 >> 0x33);
+ { uint64_t x35 = ((uint64_t)x33 & 0x7ffffffffffff);
+ { uint128_t x36 = (x34 + x20);
+ { uint64_t x37 = (uint64_t) (x36 >> 0x33);
+ { uint64_t x38 = ((uint64_t)x36 & 0x7ffffffffffff);
+ { uint128_t x39 = (x26 + ((uint128_t)0x2fd * x37));
+ { uint64_t x40 = (uint64_t) (x39 >> 0x33);
+ { uint64_t x41 = ((uint64_t)x39 & 0x7ffffffffffff);
+ { uint64_t x42 = (x40 + x29);
+ { uint64_t x43 = (x42 >> 0x33);
+ { uint64_t x44 = (x42 & 0x7ffffffffffff);
+ out[0] = x41;
+ out[1] = x44;
+ out[2] = (x43 + x32);
+ out[3] = x35;
+ out[4] = x38;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e255m765/fesquare.c b/src/Specific/solinas64_2e255m765/fesquare.c
index 7a3de6799..31d7686aa 100644
--- a/src/Specific/solinas64_2e255m765/fesquare.c
+++ b/src/Specific/solinas64_2e255m765/fesquare.c
@@ -1,51 +1,38 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x9 = (((uint128_t)x2 * x7) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x7 * x2)))));
-{ uint128_t x10 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x2fd * ((uint128_t)x7 * x7)));
-{ uint128_t x11 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x2fd * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
-{ uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x2fd * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
-{ uint128_t x13 = (((uint128_t)x2 * x2) + (0x2fd * (((uint128_t)x4 * x7) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((uint128_t)x7 * x4))))));
-{ uint128_t x14 = (x13 >> 0x33);
-{ uint64_t x15 = ((uint64_t)x13 & 0x7ffffffffffff);
-{ uint128_t x16 = (x14 + x12);
-{ uint128_t x17 = (x16 >> 0x33);
-{ uint64_t x18 = ((uint64_t)x16 & 0x7ffffffffffff);
-{ uint128_t x19 = (x17 + x11);
-{ uint128_t x20 = (x19 >> 0x33);
-{ uint64_t x21 = ((uint64_t)x19 & 0x7ffffffffffff);
-{ uint128_t x22 = (x20 + x10);
-{ uint128_t x23 = (x22 >> 0x33);
-{ uint64_t x24 = ((uint64_t)x22 & 0x7ffffffffffff);
-{ uint128_t x25 = (x23 + x9);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x33);
-{ uint64_t x27 = ((uint64_t)x25 & 0x7ffffffffffff);
-{ uint128_t x28 = (x15 + ((uint128_t)0x2fd * x26));
-{ uint64_t x29 = (uint64_t) (x28 >> 0x33);
-{ uint64_t x30 = ((uint64_t)x28 & 0x7ffffffffffff);
-{ uint64_t x31 = (x29 + x18);
-{ uint64_t x32 = (x31 >> 0x33);
-{ uint64_t x33 = (x31 & 0x7ffffffffffff);
-out[0] = x27;
-out[1] = x24;
-out[2] = x32 + x21;
-out[3] = x33;
-out[4] = x30;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesquare(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x9 = (((uint128_t)x2 * x7) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x7 * x2)))));
+ { uint128_t x10 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x2fd * ((uint128_t)x7 * x7)));
+ { uint128_t x11 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x2fd * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
+ { uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x2fd * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
+ { uint128_t x13 = (((uint128_t)x2 * x2) + (0x2fd * (((uint128_t)x4 * x7) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((uint128_t)x7 * x4))))));
+ { uint128_t x14 = (x13 >> 0x33);
+ { uint64_t x15 = ((uint64_t)x13 & 0x7ffffffffffff);
+ { uint128_t x16 = (x14 + x12);
+ { uint128_t x17 = (x16 >> 0x33);
+ { uint64_t x18 = ((uint64_t)x16 & 0x7ffffffffffff);
+ { uint128_t x19 = (x17 + x11);
+ { uint128_t x20 = (x19 >> 0x33);
+ { uint64_t x21 = ((uint64_t)x19 & 0x7ffffffffffff);
+ { uint128_t x22 = (x20 + x10);
+ { uint128_t x23 = (x22 >> 0x33);
+ { uint64_t x24 = ((uint64_t)x22 & 0x7ffffffffffff);
+ { uint128_t x25 = (x23 + x9);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x33);
+ { uint64_t x27 = ((uint64_t)x25 & 0x7ffffffffffff);
+ { uint128_t x28 = (x15 + ((uint128_t)0x2fd * x26));
+ { uint64_t x29 = (uint64_t) (x28 >> 0x33);
+ { uint64_t x30 = ((uint64_t)x28 & 0x7ffffffffffff);
+ { uint64_t x31 = (x29 + x18);
+ { uint64_t x32 = (x31 >> 0x33);
+ { uint64_t x33 = (x31 & 0x7ffffffffffff);
+ out[0] = x30;
+ out[1] = x33;
+ out[2] = (x32 + x21);
+ out[3] = x24;
+ out[4] = x27;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e255m765/freeze.c b/src/Specific/solinas64_2e255m765/freeze.c
index 0ae10d49e..043900992 100644
--- a/src/Specific/solinas64_2e255m765/freeze.c
+++ b/src/Specific/solinas64_2e255m765/freeze.c
@@ -1,42 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x10; uint8_t x11 = _subborrow_u51(0x0, x2, 0x7fffffffffd03, &x10);
-{ uint64_t x13; uint8_t x14 = _subborrow_u51(x11, x4, 0x7ffffffffffff, &x13);
-{ uint64_t x16; uint8_t x17 = _subborrow_u51(x14, x6, 0x7ffffffffffff, &x16);
-{ uint64_t x19; uint8_t x20 = _subborrow_u51(x17, x8, 0x7ffffffffffff, &x19);
-{ uint64_t x22; uint8_t x23 = _subborrow_u51(x20, x7, 0x7ffffffffffff, &x22);
-{ uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
-{ uint64_t x25 = (x24 & 0x7fffffffffd03);
-{ uint64_t x27; uint8_t x28 = _addcarryx_u51(0x0, x10, x25, &x27);
-{ uint64_t x29 = (x24 & 0x7ffffffffffff);
-{ uint64_t x31; uint8_t x32 = _addcarryx_u51(x28, x13, x29, &x31);
-{ uint64_t x33 = (x24 & 0x7ffffffffffff);
-{ uint64_t x35; uint8_t x36 = _addcarryx_u51(x32, x16, x33, &x35);
-{ uint64_t x37 = (x24 & 0x7ffffffffffff);
-{ uint64_t x39; uint8_t x40 = _addcarryx_u51(x36, x19, x37, &x39);
-{ uint64_t x41 = (x24 & 0x7ffffffffffff);
-{ uint64_t x43; uint8_t _ = _addcarryx_u51(x40, x22, x41, &x43);
-out[0] = x43;
-out[1] = x39;
-out[2] = x35;
-out[3] = x31;
-out[4] = x27;
-}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void freeze(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x10; uint8_t x11 = _subborrow_u51(0x0, x2, 0x7fffffffffd03, &x10);
+ { uint64_t x13; uint8_t x14 = _subborrow_u51(x11, x4, 0x7ffffffffffff, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u51(x14, x6, 0x7ffffffffffff, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u51(x17, x8, 0x7ffffffffffff, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u51(x20, x7, 0x7ffffffffffff, &x22);
+ { uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
+ { uint64_t x25 = (x24 & 0x7fffffffffd03);
+ { uint64_t x27; uint8_t x28 = _addcarryx_u51(0x0, x10, x25, &x27);
+ { uint64_t x29 = (x24 & 0x7ffffffffffff);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u51(x28, x13, x29, &x31);
+ { uint64_t x33 = (x24 & 0x7ffffffffffff);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u51(x32, x16, x33, &x35);
+ { uint64_t x37 = (x24 & 0x7ffffffffffff);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u51(x36, x19, x37, &x39);
+ { uint64_t x41 = (x24 & 0x7ffffffffffff);
+ { uint64_t x43; uint8_t _ = _addcarryx_u51(x40, x22, x41, &x43);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e256m189/femul.c b/src/Specific/solinas64_2e256m189/femul.c
index 4c4e7a2e4..aa2dbbea0 100644
--- a/src/Specific/solinas64_2e256m189/femul.c
+++ b/src/Specific/solinas64_2e256m189/femul.c
@@ -1,51 +1,43 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint128_t x20 = (((uint128_t)x5 * x18) + ((0x2 * ((uint128_t)x7 * x19)) + ((0x2 * ((uint128_t)x9 * x17)) + ((0x2 * ((uint128_t)x11 * x15)) + ((uint128_t)x10 * x13)))));
-{ uint128_t x21 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((0x2 * ((uint128_t)x9 * x15)) + ((uint128_t)x11 * x13)))) + (0xbd * ((uint128_t)x10 * x18)));
-{ uint128_t x22 = ((((uint128_t)x5 * x17) + ((0x2 * ((uint128_t)x7 * x15)) + ((uint128_t)x9 * x13))) + (0xbd * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
-{ uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0xbd * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
-{ uint128_t x24 = (((uint128_t)x5 * x13) + (0xbd * ((0x2 * ((uint128_t)x7 * x18)) + ((0x2 * ((uint128_t)x9 * x19)) + ((0x2 * ((uint128_t)x11 * x17)) + (0x2 * ((uint128_t)x10 * x15)))))));
-{ uint128_t x25 = (x24 >> 0x34);
-{ uint64_t x26 = ((uint64_t)x24 & 0xfffffffffffff);
-{ uint128_t x27 = (x25 + x23);
-{ uint64_t x28 = (uint64_t) (x27 >> 0x33);
-{ uint64_t x29 = ((uint64_t)x27 & 0x7ffffffffffff);
-{ uint128_t x30 = (x28 + x22);
-{ uint64_t x31 = (uint64_t) (x30 >> 0x33);
-{ uint64_t x32 = ((uint64_t)x30 & 0x7ffffffffffff);
-{ uint128_t x33 = (x31 + x21);
-{ uint64_t x34 = (uint64_t) (x33 >> 0x33);
-{ uint64_t x35 = ((uint64_t)x33 & 0x7ffffffffffff);
-{ uint128_t x36 = (x34 + x20);
-{ uint64_t x37 = (uint64_t) (x36 >> 0x33);
-{ uint64_t x38 = ((uint64_t)x36 & 0x7ffffffffffff);
-{ uint128_t x39 = (x26 + ((uint128_t)0xbd * x37));
-{ uint64_t x40 = (uint64_t) (x39 >> 0x34);
-{ uint64_t x41 = ((uint64_t)x39 & 0xfffffffffffff);
-{ uint64_t x42 = (x40 + x29);
-{ uint64_t x43 = (x42 >> 0x33);
-{ uint64_t x44 = (x42 & 0x7ffffffffffff);
-out[0] = x38;
-out[1] = x35;
-out[2] = x43 + x32;
-out[3] = x44;
-out[4] = x41;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x18 = in2[4];
+ { const uint64_t x19 = in2[3];
+ { const uint64_t x17 = in2[2];
+ { const uint64_t x15 = in2[1];
+ { const uint64_t x13 = in2[0];
+ { uint128_t x20 = (((uint128_t)x5 * x18) + ((0x2 * ((uint128_t)x7 * x19)) + ((0x2 * ((uint128_t)x9 * x17)) + ((0x2 * ((uint128_t)x11 * x15)) + ((uint128_t)x10 * x13)))));
+ { uint128_t x21 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((0x2 * ((uint128_t)x9 * x15)) + ((uint128_t)x11 * x13)))) + (0xbd * ((uint128_t)x10 * x18)));
+ { uint128_t x22 = ((((uint128_t)x5 * x17) + ((0x2 * ((uint128_t)x7 * x15)) + ((uint128_t)x9 * x13))) + (0xbd * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
+ { uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0xbd * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
+ { uint128_t x24 = (((uint128_t)x5 * x13) + (0xbd * ((0x2 * ((uint128_t)x7 * x18)) + ((0x2 * ((uint128_t)x9 * x19)) + ((0x2 * ((uint128_t)x11 * x17)) + (0x2 * ((uint128_t)x10 * x15)))))));
+ { uint128_t x25 = (x24 >> 0x34);
+ { uint64_t x26 = ((uint64_t)x24 & 0xfffffffffffff);
+ { uint128_t x27 = (x25 + x23);
+ { uint64_t x28 = (uint64_t) (x27 >> 0x33);
+ { uint64_t x29 = ((uint64_t)x27 & 0x7ffffffffffff);
+ { uint128_t x30 = (x28 + x22);
+ { uint64_t x31 = (uint64_t) (x30 >> 0x33);
+ { uint64_t x32 = ((uint64_t)x30 & 0x7ffffffffffff);
+ { uint128_t x33 = (x31 + x21);
+ { uint64_t x34 = (uint64_t) (x33 >> 0x33);
+ { uint64_t x35 = ((uint64_t)x33 & 0x7ffffffffffff);
+ { uint128_t x36 = (x34 + x20);
+ { uint64_t x37 = (uint64_t) (x36 >> 0x33);
+ { uint64_t x38 = ((uint64_t)x36 & 0x7ffffffffffff);
+ { uint128_t x39 = (x26 + ((uint128_t)0xbd * x37));
+ { uint64_t x40 = (uint64_t) (x39 >> 0x34);
+ { uint64_t x41 = ((uint64_t)x39 & 0xfffffffffffff);
+ { uint64_t x42 = (x40 + x29);
+ { uint64_t x43 = (x42 >> 0x33);
+ { uint64_t x44 = (x42 & 0x7ffffffffffff);
+ out[0] = x41;
+ out[1] = x44;
+ out[2] = (x43 + x32);
+ out[3] = x35;
+ out[4] = x38;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e256m189/fesquare.c b/src/Specific/solinas64_2e256m189/fesquare.c
index cc8808dc7..7eaafeefe 100644
--- a/src/Specific/solinas64_2e256m189/fesquare.c
+++ b/src/Specific/solinas64_2e256m189/fesquare.c
@@ -1,51 +1,38 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x9 = (((uint128_t)x2 * x7) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x7 * x2)))));
-{ uint128_t x10 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0xbd * ((uint128_t)x7 * x7)));
-{ uint128_t x11 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0xbd * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
-{ uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xbd * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
-{ uint128_t x13 = (((uint128_t)x2 * x2) + (0xbd * ((0x2 * ((uint128_t)x4 * x7)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + (0x2 * ((uint128_t)x7 * x4)))))));
-{ uint128_t x14 = (x13 >> 0x34);
-{ uint64_t x15 = ((uint64_t)x13 & 0xfffffffffffff);
-{ uint128_t x16 = (x14 + x12);
-{ uint64_t x17 = (uint64_t) (x16 >> 0x33);
-{ uint64_t x18 = ((uint64_t)x16 & 0x7ffffffffffff);
-{ uint128_t x19 = (x17 + x11);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x33);
-{ uint64_t x21 = ((uint64_t)x19 & 0x7ffffffffffff);
-{ uint128_t x22 = (x20 + x10);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x33);
-{ uint64_t x24 = ((uint64_t)x22 & 0x7ffffffffffff);
-{ uint128_t x25 = (x23 + x9);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x33);
-{ uint64_t x27 = ((uint64_t)x25 & 0x7ffffffffffff);
-{ uint128_t x28 = (x15 + ((uint128_t)0xbd * x26));
-{ uint64_t x29 = (uint64_t) (x28 >> 0x34);
-{ uint64_t x30 = ((uint64_t)x28 & 0xfffffffffffff);
-{ uint64_t x31 = (x29 + x18);
-{ uint64_t x32 = (x31 >> 0x33);
-{ uint64_t x33 = (x31 & 0x7ffffffffffff);
-out[0] = x27;
-out[1] = x24;
-out[2] = x32 + x21;
-out[3] = x33;
-out[4] = x30;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesquare(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x9 = (((uint128_t)x2 * x7) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x7 * x2)))));
+ { uint128_t x10 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0xbd * ((uint128_t)x7 * x7)));
+ { uint128_t x11 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0xbd * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
+ { uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xbd * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
+ { uint128_t x13 = (((uint128_t)x2 * x2) + (0xbd * ((0x2 * ((uint128_t)x4 * x7)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + (0x2 * ((uint128_t)x7 * x4)))))));
+ { uint128_t x14 = (x13 >> 0x34);
+ { uint64_t x15 = ((uint64_t)x13 & 0xfffffffffffff);
+ { uint128_t x16 = (x14 + x12);
+ { uint64_t x17 = (uint64_t) (x16 >> 0x33);
+ { uint64_t x18 = ((uint64_t)x16 & 0x7ffffffffffff);
+ { uint128_t x19 = (x17 + x11);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x33);
+ { uint64_t x21 = ((uint64_t)x19 & 0x7ffffffffffff);
+ { uint128_t x22 = (x20 + x10);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x33);
+ { uint64_t x24 = ((uint64_t)x22 & 0x7ffffffffffff);
+ { uint128_t x25 = (x23 + x9);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x33);
+ { uint64_t x27 = ((uint64_t)x25 & 0x7ffffffffffff);
+ { uint128_t x28 = (x15 + ((uint128_t)0xbd * x26));
+ { uint64_t x29 = (uint64_t) (x28 >> 0x34);
+ { uint64_t x30 = ((uint64_t)x28 & 0xfffffffffffff);
+ { uint64_t x31 = (x29 + x18);
+ { uint64_t x32 = (x31 >> 0x33);
+ { uint64_t x33 = (x31 & 0x7ffffffffffff);
+ out[0] = x30;
+ out[1] = x33;
+ out[2] = (x32 + x21);
+ out[3] = x24;
+ out[4] = x27;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e256m189/freeze.c b/src/Specific/solinas64_2e256m189/freeze.c
index 2278942df..174b1619d 100644
--- a/src/Specific/solinas64_2e256m189/freeze.c
+++ b/src/Specific/solinas64_2e256m189/freeze.c
@@ -1,25 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x10;
-out[1] = uint8_t x11 = Op Syntax.SubWithGetBorrow 52 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xfffffffffff43;;
+static void freeze(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x10, uint8_t x11 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xfffffffffff43);
+ { uint64_t x13; uint8_t x14 = _subborrow_u51(x11, x4, 0x7ffffffffffff, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u51(x14, x6, 0x7ffffffffffff, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u51(x17, x8, 0x7ffffffffffff, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u51(x20, x7, 0x7ffffffffffff, &x22);
+ { uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
+ { uint64_t x25 = (x24 & 0xfffffffffff43);
+ { uint64_t x27, uint8_t x28 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x10, Return x25);
+ { uint64_t x29 = (x24 & 0x7ffffffffffff);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u51(x28, x13, x29, &x31);
+ { uint64_t x33 = (x24 & 0x7ffffffffffff);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u51(x32, x16, x33, &x35);
+ { uint64_t x37 = (x24 & 0x7ffffffffffff);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u51(x36, x19, x37, &x39);
+ { uint64_t x41 = (x24 & 0x7ffffffffffff);
+ { uint64_t x43; uint8_t _ = _addcarryx_u51(x40, x22, x41, &x43);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e256m2e224p2e192p2e96m1/freeze.c b/src/Specific/solinas64_2e256m2e224p2e192p2e96m1/freeze.c
index 1c76aa42f..3901aa11d 100644
--- a/src/Specific/solinas64_2e256m2e224p2e192p2e96m1/freeze.c
+++ b/src/Specific/solinas64_2e256m2e224p2e192p2e96m1/freeze.c
@@ -1,25 +1,28 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x10;
-out[1] = uint8_t x11 = Op Syntax.SubWithGetBorrow 52 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xfffffffffffff;;
+static void freeze(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x10, uint8_t x11 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xfffffffffffff);
+ { uint64_t x13; ℤ x14 = _subborrow_u51ℤ(x11, x4, 0xfffffffffff, &x13);
+ { uint64_t x16; ℤ x17 = _subborrow_u51ℤ(x14, x6, 0x0, &x16);
+ { uint64_t x19; ℤ x20 = _subborrow_u51ℤ(x17, x8, 0x4000000000, &x19);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 51 Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x20, Return x7, 0x7fffffff80000);
+ { uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
+ { uint64_t x25 = (x24 & 0xfffffffffffff);
+ { uint64_t x27, uint8_t x28 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x10, Return x25);
+ { uint64_t x29 = (x24 & 0xfffffffffff);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u51(x28, x13, x29, &x31);
+ { uint64_t x34; uint8_t x35 = _addcarryx_u51(x32, x16, 0x0, &x34);
+ { uint64_t x36 = (x24 & 0x4000000000);
+ { uint64_t x38; uint8_t x39 = _addcarryx_u51(x35, x19, x36, &x38);
+ { uint64_t x40 = (x24 & 0x7fffffff80000);
+ { uint64_t x42; uint8_t _ = _addcarryx_u51(x39, x22, x40, &x42);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x34;
+ out[3] = x38;
+ out[4] = x42;
+ }}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e256m2e32m977/femul.c b/src/Specific/solinas64_2e256m2e32m977/femul.c
index b6759718f..ac6b087a0 100644
--- a/src/Specific/solinas64_2e256m2e32m977/femul.c
+++ b/src/Specific/solinas64_2e256m2e32m977/femul.c
@@ -1,59 +1,51 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint128_t x20 = (((uint128_t)x5 * x18) + ((0x2 * ((uint128_t)x7 * x19)) + ((0x2 * ((uint128_t)x9 * x17)) + ((0x2 * ((uint128_t)x11 * x15)) + ((uint128_t)x10 * x13)))));
-{ ℤ x21 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((0x2 * ((uint128_t)x9 * x15)) + ((uint128_t)x11 * x13)))) +ℤ ((0x3d1 * ((uint128_t)x10 * x18)) +ℤ (0x100000000 *ℤ ((uint128_t)x10 * x18))));
-{ ℤ x22 = ((((uint128_t)x5 * x17) + ((0x2 * ((uint128_t)x7 * x15)) + ((uint128_t)x9 * x13))) +ℤ ((0x3d1 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))) +ℤ (0x100000000 *ℤ (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19)))));
-{ ℤ x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) +ℤ ((0x3d1 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))) +ℤ (0x100000000 *ℤ (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17))))));
-{ ℤ x24 = (((uint128_t)x5 * x13) +ℤ ((0x3d1 * ((0x2 * ((uint128_t)x7 * x18)) + ((0x2 * ((uint128_t)x9 * x19)) + ((0x2 * ((uint128_t)x11 * x17)) + (0x2 * ((uint128_t)x10 * x15)))))) +ℤ (0x100000000 *ℤ ((0x2 * ((uint128_t)x7 * x18)) + ((0x2 * ((uint128_t)x9 * x19)) + ((0x2 * ((uint128_t)x11 * x17)) + (0x2 * ((uint128_t)x10 * x15))))))));
-{ uint64_t x25 = (uint64_t) (x20 >> 0x33);
-{ uint64_t x26 = ((uint64_t)x20 & 0x7ffffffffffff);
-{ uint128_t x27 = (((uint128_t)0x8000000000000 * x25) + x26);
-{ uint64_t x28 = (uint64_t) (x27 >> 0x33);
-{ uint64_t x29 = ((uint64_t)x27 & 0x7ffffffffffff);
-{ uint128_t x30 = (((uint128_t)0x8000000000000 * x28) + x29);
-{ uint64_t x31 = (uint64_t) (x30 >> 0x33);
-{ uint64_t x32 = ((uint64_t)x30 & 0x7ffffffffffff);
-{ ℤ x33 = (x24 +ℤ (((uint128_t)0x3d1 * x31) + ((uint128_t)0x100000000 * x31)));
-{ uint128_t x34 = (x33 >> 0x34);
-{ uint64_t x35 = (x33 & 0xfffffffffffff);
-{ ℤ x36 = (x34 +ℤ x23);
-{ uint128_t x37 = (x36 >> 0x33);
-{ uint64_t x38 = (x36 & 0x7ffffffffffff);
-{ ℤ x39 = (x37 +ℤ x22);
-{ uint128_t x40 = (x39 >> 0x33);
-{ uint64_t x41 = (x39 & 0x7ffffffffffff);
-{ ℤ x42 = (x40 +ℤ x21);
-{ uint128_t x43 = (x42 >> 0x33);
-{ uint64_t x44 = (x42 & 0x7ffffffffffff);
-{ uint128_t x45 = (x43 + x32);
-{ uint64_t x46 = (uint64_t) (x45 >> 0x33);
-{ uint64_t x47 = ((uint64_t)x45 & 0x7ffffffffffff);
-{ uint128_t x48 = (x35 + ((0x3d1 * x46) + ((uint128_t)0x100000000 * x46)));
-{ uint64_t x49 = (uint64_t) (x48 >> 0x34);
-{ uint64_t x50 = ((uint64_t)x48 & 0xfffffffffffff);
-{ uint64_t x51 = (x50 >> 0x34);
-{ uint64_t x52 = (x50 & 0xfffffffffffff);
-out[0] = x47;
-out[1] = x44;
-out[2] = x41;
-out[3] = x51 + x49 + x38;
-out[4] = x52;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x18 = in2[4];
+ { const uint64_t x19 = in2[3];
+ { const uint64_t x17 = in2[2];
+ { const uint64_t x15 = in2[1];
+ { const uint64_t x13 = in2[0];
+ { uint128_t x20 = (((uint128_t)x5 * x18) + ((0x2 * ((uint128_t)x7 * x19)) + ((0x2 * ((uint128_t)x9 * x17)) + ((0x2 * ((uint128_t)x11 * x15)) + ((uint128_t)x10 * x13)))));
+ { ℤ x21 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((0x2 * ((uint128_t)x9 * x15)) + ((uint128_t)x11 * x13)))) +ℤ ((0x3d1 * ((uint128_t)x10 * x18)) +ℤ (0x100000000 *ℤ ((uint128_t)x10 * x18))));
+ { ℤ x22 = ((((uint128_t)x5 * x17) + ((0x2 * ((uint128_t)x7 * x15)) + ((uint128_t)x9 * x13))) +ℤ ((0x3d1 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))) +ℤ (0x100000000 *ℤ (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19)))));
+ { ℤ x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) +ℤ ((0x3d1 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))) +ℤ (0x100000000 *ℤ (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17))))));
+ { ℤ x24 = (((uint128_t)x5 * x13) +ℤ ((0x3d1 * ((0x2 * ((uint128_t)x7 * x18)) + ((0x2 * ((uint128_t)x9 * x19)) + ((0x2 * ((uint128_t)x11 * x17)) + (0x2 * ((uint128_t)x10 * x15)))))) +ℤ (0x100000000 *ℤ ((0x2 * ((uint128_t)x7 * x18)) + ((0x2 * ((uint128_t)x9 * x19)) + ((0x2 * ((uint128_t)x11 * x17)) + (0x2 * ((uint128_t)x10 * x15))))))));
+ { uint64_t x25 = (uint64_t) (x20 >> 0x33);
+ { uint64_t x26 = ((uint64_t)x20 & 0x7ffffffffffff);
+ { uint128_t x27 = (((uint128_t)0x8000000000000 * x25) + x26);
+ { uint64_t x28 = (uint64_t) (x27 >> 0x33);
+ { uint64_t x29 = ((uint64_t)x27 & 0x7ffffffffffff);
+ { uint128_t x30 = (((uint128_t)0x8000000000000 * x28) + x29);
+ { uint64_t x31 = (uint64_t) (x30 >> 0x33);
+ { uint64_t x32 = ((uint64_t)x30 & 0x7ffffffffffff);
+ { ℤ x33 = (x24 +ℤ (((uint128_t)0x3d1 * x31) + ((uint128_t)0x100000000 * x31)));
+ { uint128_t x34 = (x33 >> 0x34);
+ { uint64_t x35 = (x33 & 0xfffffffffffff);
+ { ℤ x36 = (x34 +ℤ x23);
+ { uint128_t x37 = (x36 >> 0x33);
+ { uint64_t x38 = (x36 & 0x7ffffffffffff);
+ { ℤ x39 = (x37 +ℤ x22);
+ { uint128_t x40 = (x39 >> 0x33);
+ { uint64_t x41 = (x39 & 0x7ffffffffffff);
+ { ℤ x42 = (x40 +ℤ x21);
+ { uint128_t x43 = (x42 >> 0x33);
+ { uint64_t x44 = (x42 & 0x7ffffffffffff);
+ { uint128_t x45 = (x43 + x32);
+ { uint64_t x46 = (uint64_t) (x45 >> 0x33);
+ { uint64_t x47 = ((uint64_t)x45 & 0x7ffffffffffff);
+ { uint128_t x48 = (x35 + ((0x3d1 * x46) + ((uint128_t)0x100000000 * x46)));
+ { uint64_t x49 = (uint64_t) (x48 >> 0x34);
+ { uint64_t x50 = ((uint64_t)x48 & 0xfffffffffffff);
+ { uint64_t x51 = (x50 >> 0x34);
+ { uint64_t x52 = (x50 & 0xfffffffffffff);
+ out[0] = x52;
+ out[1] = (x51 + (x49 + x38));
+ out[2] = x41;
+ out[3] = x44;
+ out[4] = x47;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e256m2e32m977/fesquare.c b/src/Specific/solinas64_2e256m2e32m977/fesquare.c
index 5a7485a7c..23fe9a652 100644
--- a/src/Specific/solinas64_2e256m2e32m977/fesquare.c
+++ b/src/Specific/solinas64_2e256m2e32m977/fesquare.c
@@ -1,59 +1,46 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x9 = (((uint128_t)x2 * x7) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x7 * x2)))));
-{ ℤ x10 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) +ℤ ((0x3d1 * ((uint128_t)x7 * x7)) +ℤ (0x100000000 *ℤ ((uint128_t)x7 * x7))));
-{ ℤ x11 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) +ℤ ((0x3d1 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))) +ℤ (0x100000000 *ℤ (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8)))));
-{ ℤ x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) +ℤ ((0x3d1 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))) +ℤ (0x100000000 *ℤ (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6))))));
-{ ℤ x13 = (((uint128_t)x2 * x2) +ℤ ((0x3d1 * ((0x2 * ((uint128_t)x4 * x7)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + (0x2 * ((uint128_t)x7 * x4)))))) +ℤ (0x100000000 *ℤ ((0x2 * ((uint128_t)x4 * x7)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + (0x2 * ((uint128_t)x7 * x4))))))));
-{ uint64_t x14 = (uint64_t) (x9 >> 0x33);
-{ uint64_t x15 = ((uint64_t)x9 & 0x7ffffffffffff);
-{ uint128_t x16 = (((uint128_t)0x8000000000000 * x14) + x15);
-{ uint64_t x17 = (uint64_t) (x16 >> 0x33);
-{ uint64_t x18 = ((uint64_t)x16 & 0x7ffffffffffff);
-{ uint128_t x19 = (((uint128_t)0x8000000000000 * x17) + x18);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x33);
-{ uint64_t x21 = ((uint64_t)x19 & 0x7ffffffffffff);
-{ ℤ x22 = (x13 +ℤ (((uint128_t)0x3d1 * x20) + ((uint128_t)0x100000000 * x20)));
-{ uint128_t x23 = (x22 >> 0x34);
-{ uint64_t x24 = (x22 & 0xfffffffffffff);
-{ ℤ x25 = (x23 +ℤ x12);
-{ uint128_t x26 = (x25 >> 0x33);
-{ uint64_t x27 = (x25 & 0x7ffffffffffff);
-{ ℤ x28 = (x26 +ℤ x11);
-{ uint128_t x29 = (x28 >> 0x33);
-{ uint64_t x30 = (x28 & 0x7ffffffffffff);
-{ ℤ x31 = (x29 +ℤ x10);
-{ uint128_t x32 = (x31 >> 0x33);
-{ uint64_t x33 = (x31 & 0x7ffffffffffff);
-{ uint128_t x34 = (x32 + x21);
-{ uint64_t x35 = (uint64_t) (x34 >> 0x33);
-{ uint64_t x36 = ((uint64_t)x34 & 0x7ffffffffffff);
-{ uint128_t x37 = (x24 + ((0x3d1 * x35) + ((uint128_t)0x100000000 * x35)));
-{ uint64_t x38 = (uint64_t) (x37 >> 0x34);
-{ uint64_t x39 = ((uint64_t)x37 & 0xfffffffffffff);
-{ uint64_t x40 = (x39 >> 0x34);
-{ uint64_t x41 = (x39 & 0xfffffffffffff);
-out[0] = x36;
-out[1] = x33;
-out[2] = x30;
-out[3] = x40 + x38 + x27;
-out[4] = x41;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesquare(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x9 = (((uint128_t)x2 * x7) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x7 * x2)))));
+ { ℤ x10 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) +ℤ ((0x3d1 * ((uint128_t)x7 * x7)) +ℤ (0x100000000 *ℤ ((uint128_t)x7 * x7))));
+ { ℤ x11 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) +ℤ ((0x3d1 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))) +ℤ (0x100000000 *ℤ (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8)))));
+ { ℤ x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) +ℤ ((0x3d1 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))) +ℤ (0x100000000 *ℤ (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6))))));
+ { ℤ x13 = (((uint128_t)x2 * x2) +ℤ ((0x3d1 * ((0x2 * ((uint128_t)x4 * x7)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + (0x2 * ((uint128_t)x7 * x4)))))) +ℤ (0x100000000 *ℤ ((0x2 * ((uint128_t)x4 * x7)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + (0x2 * ((uint128_t)x7 * x4))))))));
+ { uint64_t x14 = (uint64_t) (x9 >> 0x33);
+ { uint64_t x15 = ((uint64_t)x9 & 0x7ffffffffffff);
+ { uint128_t x16 = (((uint128_t)0x8000000000000 * x14) + x15);
+ { uint64_t x17 = (uint64_t) (x16 >> 0x33);
+ { uint64_t x18 = ((uint64_t)x16 & 0x7ffffffffffff);
+ { uint128_t x19 = (((uint128_t)0x8000000000000 * x17) + x18);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x33);
+ { uint64_t x21 = ((uint64_t)x19 & 0x7ffffffffffff);
+ { ℤ x22 = (x13 +ℤ (((uint128_t)0x3d1 * x20) + ((uint128_t)0x100000000 * x20)));
+ { uint128_t x23 = (x22 >> 0x34);
+ { uint64_t x24 = (x22 & 0xfffffffffffff);
+ { ℤ x25 = (x23 +ℤ x12);
+ { uint128_t x26 = (x25 >> 0x33);
+ { uint64_t x27 = (x25 & 0x7ffffffffffff);
+ { ℤ x28 = (x26 +ℤ x11);
+ { uint128_t x29 = (x28 >> 0x33);
+ { uint64_t x30 = (x28 & 0x7ffffffffffff);
+ { ℤ x31 = (x29 +ℤ x10);
+ { uint128_t x32 = (x31 >> 0x33);
+ { uint64_t x33 = (x31 & 0x7ffffffffffff);
+ { uint128_t x34 = (x32 + x21);
+ { uint64_t x35 = (uint64_t) (x34 >> 0x33);
+ { uint64_t x36 = ((uint64_t)x34 & 0x7ffffffffffff);
+ { uint128_t x37 = (x24 + ((0x3d1 * x35) + ((uint128_t)0x100000000 * x35)));
+ { uint64_t x38 = (uint64_t) (x37 >> 0x34);
+ { uint64_t x39 = ((uint64_t)x37 & 0xfffffffffffff);
+ { uint64_t x40 = (x39 >> 0x34);
+ { uint64_t x41 = (x39 & 0xfffffffffffff);
+ out[0] = x41;
+ out[1] = (x40 + (x38 + x27));
+ out[2] = x30;
+ out[3] = x33;
+ out[4] = x36;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e256m2e32m977/freeze.c b/src/Specific/solinas64_2e256m2e32m977/freeze.c
index 5c8088e2e..e6380125f 100644
--- a/src/Specific/solinas64_2e256m2e32m977/freeze.c
+++ b/src/Specific/solinas64_2e256m2e32m977/freeze.c
@@ -1,25 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x10;
-out[1] = uint8_t x11 = Op Syntax.SubWithGetBorrow 52 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffefffffc2f;;
+static void freeze(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x10, uint8_t x11 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xffffefffffc2f);
+ { uint64_t x13; uint8_t x14 = _subborrow_u51(x11, x4, 0x7ffffffffffff, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u51(x14, x6, 0x7ffffffffffff, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u51(x17, x8, 0x7ffffffffffff, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u51(x20, x7, 0x7ffffffffffff, &x22);
+ { uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
+ { uint64_t x25 = (x24 & 0xffffefffffc2f);
+ { uint64_t x27, uint8_t x28 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x10, Return x25);
+ { uint64_t x29 = (x24 & 0x7ffffffffffff);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u51(x28, x13, x29, &x31);
+ { uint64_t x33 = (x24 & 0x7ffffffffffff);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u51(x32, x16, x33, &x35);
+ { uint64_t x37 = (x24 & 0x7ffffffffffff);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u51(x36, x19, x37, &x39);
+ { uint64_t x41 = (x24 & 0x7ffffffffffff);
+ { uint64_t x43; uint8_t _ = _addcarryx_u51(x40, x22, x41, &x43);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e256m88x2e240m1/freeze.c b/src/Specific/solinas64_2e256m88x2e240m1/freeze.c
index 1c76aa42f..a4fac224e 100644
--- a/src/Specific/solinas64_2e256m88x2e240m1/freeze.c
+++ b/src/Specific/solinas64_2e256m88x2e240m1/freeze.c
@@ -1,25 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x10;
-out[1] = uint8_t x11 = Op Syntax.SubWithGetBorrow 52 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xfffffffffffff;;
+static void freeze(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x10, uint8_t x11 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xfffffffffffff);
+ { uint64_t x13; uint8_t x14 = _subborrow_u51(x11, x4, 0x7ffffffffffff, &x13);
+ { uint64_t x16; uint8_t x17 = _subborrow_u51(x14, x6, 0x7ffffffffffff, &x16);
+ { uint64_t x19; uint8_t x20 = _subborrow_u51(x17, x8, 0x7ffffffffffff, &x19);
+ { uint64_t x22; uint8_t x23 = _subborrow_u51(x20, x7, 0x7fd3fffffffff, &x22);
+ { uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
+ { uint64_t x25 = (x24 & 0xfffffffffffff);
+ { uint64_t x27, uint8_t x28 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x10, Return x25);
+ { uint64_t x29 = (x24 & 0x7ffffffffffff);
+ { uint64_t x31; uint8_t x32 = _addcarryx_u51(x28, x13, x29, &x31);
+ { uint64_t x33 = (x24 & 0x7ffffffffffff);
+ { uint64_t x35; uint8_t x36 = _addcarryx_u51(x32, x16, x33, &x35);
+ { uint64_t x37 = (x24 & 0x7ffffffffffff);
+ { uint64_t x39; uint8_t x40 = _addcarryx_u51(x36, x19, x37, &x39);
+ { uint64_t x41 = (x24 & 0x7fd3fffffffff);
+ { uint64_t x43; uint8_t _ = _addcarryx_u51(x40, x22, x41, &x43);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e266m3/femul.c b/src/Specific/solinas64_2e266m3/femul.c
index 8deb45986..fbc515dcc 100644
--- a/src/Specific/solinas64_2e266m3/femul.c
+++ b/src/Specific/solinas64_2e266m3/femul.c
@@ -1,51 +1,43 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint128_t x20 = (((uint128_t)x5 * x18) + ((0x2 * ((uint128_t)x7 * x19)) + ((0x2 * ((uint128_t)x9 * x17)) + ((0x2 * ((uint128_t)x11 * x15)) + ((uint128_t)x10 * x13)))));
-{ uint128_t x21 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((0x2 * ((uint128_t)x9 * x15)) + ((uint128_t)x11 * x13)))) + (0x3 * ((uint128_t)x10 * x18)));
-{ uint128_t x22 = ((((uint128_t)x5 * x17) + ((0x2 * ((uint128_t)x7 * x15)) + ((uint128_t)x9 * x13))) + (0x3 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
-{ uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0x3 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
-{ uint128_t x24 = (((uint128_t)x5 * x13) + (0x3 * ((0x2 * ((uint128_t)x7 * x18)) + ((0x2 * ((uint128_t)x9 * x19)) + ((0x2 * ((uint128_t)x11 * x17)) + (0x2 * ((uint128_t)x10 * x15)))))));
-{ uint64_t x25 = (uint64_t) (x24 >> 0x36);
-{ uint64_t x26 = ((uint64_t)x24 & 0x3fffffffffffff);
-{ uint128_t x27 = (x25 + x23);
-{ uint64_t x28 = (uint64_t) (x27 >> 0x35);
-{ uint64_t x29 = ((uint64_t)x27 & 0x1fffffffffffff);
-{ uint128_t x30 = (x28 + x22);
-{ uint64_t x31 = (uint64_t) (x30 >> 0x35);
-{ uint64_t x32 = ((uint64_t)x30 & 0x1fffffffffffff);
-{ uint128_t x33 = (x31 + x21);
-{ uint64_t x34 = (uint64_t) (x33 >> 0x35);
-{ uint64_t x35 = ((uint64_t)x33 & 0x1fffffffffffff);
-{ uint128_t x36 = (x34 + x20);
-{ uint64_t x37 = (uint64_t) (x36 >> 0x35);
-{ uint64_t x38 = ((uint64_t)x36 & 0x1fffffffffffff);
-{ uint64_t x39 = (x26 + (0x3 * x37));
-{ uint64_t x40 = (x39 >> 0x36);
-{ uint64_t x41 = (x39 & 0x3fffffffffffff);
-{ uint64_t x42 = (x40 + x29);
-{ uint64_t x43 = (x42 >> 0x35);
-{ uint64_t x44 = (x42 & 0x1fffffffffffff);
-out[0] = x38;
-out[1] = x35;
-out[2] = x43 + x32;
-out[3] = x44;
-out[4] = x41;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x18 = in2[4];
+ { const uint64_t x19 = in2[3];
+ { const uint64_t x17 = in2[2];
+ { const uint64_t x15 = in2[1];
+ { const uint64_t x13 = in2[0];
+ { uint128_t x20 = (((uint128_t)x5 * x18) + ((0x2 * ((uint128_t)x7 * x19)) + ((0x2 * ((uint128_t)x9 * x17)) + ((0x2 * ((uint128_t)x11 * x15)) + ((uint128_t)x10 * x13)))));
+ { uint128_t x21 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((0x2 * ((uint128_t)x9 * x15)) + ((uint128_t)x11 * x13)))) + (0x3 * ((uint128_t)x10 * x18)));
+ { uint128_t x22 = ((((uint128_t)x5 * x17) + ((0x2 * ((uint128_t)x7 * x15)) + ((uint128_t)x9 * x13))) + (0x3 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
+ { uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0x3 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
+ { uint128_t x24 = (((uint128_t)x5 * x13) + (0x3 * ((0x2 * ((uint128_t)x7 * x18)) + ((0x2 * ((uint128_t)x9 * x19)) + ((0x2 * ((uint128_t)x11 * x17)) + (0x2 * ((uint128_t)x10 * x15)))))));
+ { uint64_t x25 = (uint64_t) (x24 >> 0x36);
+ { uint64_t x26 = ((uint64_t)x24 & 0x3fffffffffffff);
+ { uint128_t x27 = (x25 + x23);
+ { uint64_t x28 = (uint64_t) (x27 >> 0x35);
+ { uint64_t x29 = ((uint64_t)x27 & 0x1fffffffffffff);
+ { uint128_t x30 = (x28 + x22);
+ { uint64_t x31 = (uint64_t) (x30 >> 0x35);
+ { uint64_t x32 = ((uint64_t)x30 & 0x1fffffffffffff);
+ { uint128_t x33 = (x31 + x21);
+ { uint64_t x34 = (uint64_t) (x33 >> 0x35);
+ { uint64_t x35 = ((uint64_t)x33 & 0x1fffffffffffff);
+ { uint128_t x36 = (x34 + x20);
+ { uint64_t x37 = (uint64_t) (x36 >> 0x35);
+ { uint64_t x38 = ((uint64_t)x36 & 0x1fffffffffffff);
+ { uint64_t x39 = (x26 + (0x3 * x37));
+ { uint64_t x40 = (x39 >> 0x36);
+ { uint64_t x41 = (x39 & 0x3fffffffffffff);
+ { uint64_t x42 = (x40 + x29);
+ { uint64_t x43 = (x42 >> 0x35);
+ { uint64_t x44 = (x42 & 0x1fffffffffffff);
+ out[0] = x41;
+ out[1] = x44;
+ out[2] = (x43 + x32);
+ out[3] = x35;
+ out[4] = x38;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e266m3/fesquare.c b/src/Specific/solinas64_2e266m3/fesquare.c
index d6f5c61ec..56f5fdb77 100644
--- a/src/Specific/solinas64_2e266m3/fesquare.c
+++ b/src/Specific/solinas64_2e266m3/fesquare.c
@@ -1,51 +1,38 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x9 = (((uint128_t)x2 * x7) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x7 * x2)))));
-{ uint128_t x10 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x3 * ((uint128_t)x7 * x7)));
-{ uint128_t x11 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x3 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
-{ uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x3 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
-{ uint128_t x13 = (((uint128_t)x2 * x2) + (0x3 * ((0x2 * ((uint128_t)x4 * x7)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + (0x2 * ((uint128_t)x7 * x4)))))));
-{ uint64_t x14 = (uint64_t) (x13 >> 0x36);
-{ uint64_t x15 = ((uint64_t)x13 & 0x3fffffffffffff);
-{ uint128_t x16 = (x14 + x12);
-{ uint64_t x17 = (uint64_t) (x16 >> 0x35);
-{ uint64_t x18 = ((uint64_t)x16 & 0x1fffffffffffff);
-{ uint128_t x19 = (x17 + x11);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x35);
-{ uint64_t x21 = ((uint64_t)x19 & 0x1fffffffffffff);
-{ uint128_t x22 = (x20 + x10);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x35);
-{ uint64_t x24 = ((uint64_t)x22 & 0x1fffffffffffff);
-{ uint128_t x25 = (x23 + x9);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x35);
-{ uint64_t x27 = ((uint64_t)x25 & 0x1fffffffffffff);
-{ uint64_t x28 = (x15 + (0x3 * x26));
-{ uint64_t x29 = (x28 >> 0x36);
-{ uint64_t x30 = (x28 & 0x3fffffffffffff);
-{ uint64_t x31 = (x29 + x18);
-{ uint64_t x32 = (x31 >> 0x35);
-{ uint64_t x33 = (x31 & 0x1fffffffffffff);
-out[0] = x27;
-out[1] = x24;
-out[2] = x32 + x21;
-out[3] = x33;
-out[4] = x30;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesquare(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x9 = (((uint128_t)x2 * x7) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x7 * x2)))));
+ { uint128_t x10 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x3 * ((uint128_t)x7 * x7)));
+ { uint128_t x11 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x3 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
+ { uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x3 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
+ { uint128_t x13 = (((uint128_t)x2 * x2) + (0x3 * ((0x2 * ((uint128_t)x4 * x7)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + (0x2 * ((uint128_t)x7 * x4)))))));
+ { uint64_t x14 = (uint64_t) (x13 >> 0x36);
+ { uint64_t x15 = ((uint64_t)x13 & 0x3fffffffffffff);
+ { uint128_t x16 = (x14 + x12);
+ { uint64_t x17 = (uint64_t) (x16 >> 0x35);
+ { uint64_t x18 = ((uint64_t)x16 & 0x1fffffffffffff);
+ { uint128_t x19 = (x17 + x11);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x35);
+ { uint64_t x21 = ((uint64_t)x19 & 0x1fffffffffffff);
+ { uint128_t x22 = (x20 + x10);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x35);
+ { uint64_t x24 = ((uint64_t)x22 & 0x1fffffffffffff);
+ { uint128_t x25 = (x23 + x9);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x35);
+ { uint64_t x27 = ((uint64_t)x25 & 0x1fffffffffffff);
+ { uint64_t x28 = (x15 + (0x3 * x26));
+ { uint64_t x29 = (x28 >> 0x36);
+ { uint64_t x30 = (x28 & 0x3fffffffffffff);
+ { uint64_t x31 = (x29 + x18);
+ { uint64_t x32 = (x31 >> 0x35);
+ { uint64_t x33 = (x31 & 0x1fffffffffffff);
+ out[0] = x30;
+ out[1] = x33;
+ out[2] = (x32 + x21);
+ out[3] = x24;
+ out[4] = x27;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e266m3/freeze.c b/src/Specific/solinas64_2e266m3/freeze.c
index 34a56b26e..aeae95dd8 100644
--- a/src/Specific/solinas64_2e266m3/freeze.c
+++ b/src/Specific/solinas64_2e266m3/freeze.c
@@ -1,25 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x10;
-out[1] = uint8_t x11 = Op Syntax.SubWithGetBorrow 54 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3ffffffffffffd;;
+static void freeze(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x10, uint8_t x11 = Op (Syntax.SubWithGetBorrow 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x3ffffffffffffd);
+ { uint64_t x13, uint8_t x14 = Op (Syntax.SubWithGetBorrow 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x11, Return x4, 0x1fffffffffffff);
+ { uint64_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x14, Return x6, 0x1fffffffffffff);
+ { uint64_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x17, Return x8, 0x1fffffffffffff);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x20, Return x7, 0x1fffffffffffff);
+ { uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
+ { uint64_t x25 = (x24 & 0x3ffffffffffffd);
+ { uint64_t x27, uint8_t x28 = Op (Syntax.AddWithGetCarry 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x10, Return x25);
+ { uint64_t x29 = (x24 & 0x1fffffffffffff);
+ { uint64_t x31, uint8_t x32 = Op (Syntax.AddWithGetCarry 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x28, Return x13, Return x29);
+ { uint64_t x33 = (x24 & 0x1fffffffffffff);
+ { uint64_t x35, uint8_t x36 = Op (Syntax.AddWithGetCarry 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x32, Return x16, Return x33);
+ { uint64_t x37 = (x24 & 0x1fffffffffffff);
+ { uint64_t x39, uint8_t x40 = Op (Syntax.AddWithGetCarry 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x36, Return x19, Return x37);
+ { uint64_t x41 = (x24 & 0x1fffffffffffff);
+ { uint64_t x43, uint8_t _ = Op (Syntax.AddWithGetCarry 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x40, Return x22, Return x41);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e285m9/femul.c b/src/Specific/solinas64_2e285m9/femul.c
index 0a43f11ee..932ed20b5 100644
--- a/src/Specific/solinas64_2e285m9/femul.c
+++ b/src/Specific/solinas64_2e285m9/femul.c
@@ -1,51 +1,43 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint128_t x20 = (((uint128_t)x5 * x18) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + (((uint128_t)x11 * x15) + ((uint128_t)x10 * x13)))));
-{ uint128_t x21 = ((((uint128_t)x5 * x19) + (((uint128_t)x7 * x17) + (((uint128_t)x9 * x15) + ((uint128_t)x11 * x13)))) + (0x9 * ((uint128_t)x10 * x18)));
-{ uint128_t x22 = ((((uint128_t)x5 * x17) + (((uint128_t)x7 * x15) + ((uint128_t)x9 * x13))) + (0x9 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
-{ uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0x9 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
-{ uint128_t x24 = (((uint128_t)x5 * x13) + (0x9 * (((uint128_t)x7 * x18) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x10 * x15))))));
-{ uint128_t x25 = (x24 >> 0x39);
-{ uint64_t x26 = ((uint64_t)x24 & 0x1ffffffffffffff);
-{ uint128_t x27 = (x25 + x23);
-{ uint128_t x28 = (x27 >> 0x39);
-{ uint64_t x29 = ((uint64_t)x27 & 0x1ffffffffffffff);
-{ uint128_t x30 = (x28 + x22);
-{ uint128_t x31 = (x30 >> 0x39);
-{ uint64_t x32 = ((uint64_t)x30 & 0x1ffffffffffffff);
-{ uint128_t x33 = (x31 + x21);
-{ uint128_t x34 = (x33 >> 0x39);
-{ uint64_t x35 = ((uint64_t)x33 & 0x1ffffffffffffff);
-{ uint128_t x36 = (x34 + x20);
-{ uint64_t x37 = (uint64_t) (x36 >> 0x39);
-{ uint64_t x38 = ((uint64_t)x36 & 0x1ffffffffffffff);
-{ uint128_t x39 = (x26 + ((uint128_t)0x9 * x37));
-{ uint64_t x40 = (uint64_t) (x39 >> 0x39);
-{ uint64_t x41 = ((uint64_t)x39 & 0x1ffffffffffffff);
-{ uint64_t x42 = (x40 + x29);
-{ uint64_t x43 = (x42 >> 0x39);
-{ uint64_t x44 = (x42 & 0x1ffffffffffffff);
-out[0] = x38;
-out[1] = x35;
-out[2] = x43 + x32;
-out[3] = x44;
-out[4] = x41;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x18 = in2[4];
+ { const uint64_t x19 = in2[3];
+ { const uint64_t x17 = in2[2];
+ { const uint64_t x15 = in2[1];
+ { const uint64_t x13 = in2[0];
+ { uint128_t x20 = (((uint128_t)x5 * x18) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + (((uint128_t)x11 * x15) + ((uint128_t)x10 * x13)))));
+ { uint128_t x21 = ((((uint128_t)x5 * x19) + (((uint128_t)x7 * x17) + (((uint128_t)x9 * x15) + ((uint128_t)x11 * x13)))) + (0x9 * ((uint128_t)x10 * x18)));
+ { uint128_t x22 = ((((uint128_t)x5 * x17) + (((uint128_t)x7 * x15) + ((uint128_t)x9 * x13))) + (0x9 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
+ { uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0x9 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
+ { uint128_t x24 = (((uint128_t)x5 * x13) + (0x9 * (((uint128_t)x7 * x18) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x10 * x15))))));
+ { uint128_t x25 = (x24 >> 0x39);
+ { uint64_t x26 = ((uint64_t)x24 & 0x1ffffffffffffff);
+ { uint128_t x27 = (x25 + x23);
+ { uint128_t x28 = (x27 >> 0x39);
+ { uint64_t x29 = ((uint64_t)x27 & 0x1ffffffffffffff);
+ { uint128_t x30 = (x28 + x22);
+ { uint128_t x31 = (x30 >> 0x39);
+ { uint64_t x32 = ((uint64_t)x30 & 0x1ffffffffffffff);
+ { uint128_t x33 = (x31 + x21);
+ { uint128_t x34 = (x33 >> 0x39);
+ { uint64_t x35 = ((uint64_t)x33 & 0x1ffffffffffffff);
+ { uint128_t x36 = (x34 + x20);
+ { uint64_t x37 = (uint64_t) (x36 >> 0x39);
+ { uint64_t x38 = ((uint64_t)x36 & 0x1ffffffffffffff);
+ { uint128_t x39 = (x26 + ((uint128_t)0x9 * x37));
+ { uint64_t x40 = (uint64_t) (x39 >> 0x39);
+ { uint64_t x41 = ((uint64_t)x39 & 0x1ffffffffffffff);
+ { uint64_t x42 = (x40 + x29);
+ { uint64_t x43 = (x42 >> 0x39);
+ { uint64_t x44 = (x42 & 0x1ffffffffffffff);
+ out[0] = x41;
+ out[1] = x44;
+ out[2] = (x43 + x32);
+ out[3] = x35;
+ out[4] = x38;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e285m9/fesquare.c b/src/Specific/solinas64_2e285m9/fesquare.c
index a15f688d5..2d267f366 100644
--- a/src/Specific/solinas64_2e285m9/fesquare.c
+++ b/src/Specific/solinas64_2e285m9/fesquare.c
@@ -1,51 +1,38 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x9 = (((uint128_t)x2 * x7) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x7 * x2)))));
-{ uint128_t x10 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x9 * ((uint128_t)x7 * x7)));
-{ uint128_t x11 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x9 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
-{ uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x9 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
-{ uint128_t x13 = (((uint128_t)x2 * x2) + (0x9 * (((uint128_t)x4 * x7) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((uint128_t)x7 * x4))))));
-{ uint128_t x14 = (x13 >> 0x39);
-{ uint64_t x15 = ((uint64_t)x13 & 0x1ffffffffffffff);
-{ uint128_t x16 = (x14 + x12);
-{ uint128_t x17 = (x16 >> 0x39);
-{ uint64_t x18 = ((uint64_t)x16 & 0x1ffffffffffffff);
-{ uint128_t x19 = (x17 + x11);
-{ uint128_t x20 = (x19 >> 0x39);
-{ uint64_t x21 = ((uint64_t)x19 & 0x1ffffffffffffff);
-{ uint128_t x22 = (x20 + x10);
-{ uint128_t x23 = (x22 >> 0x39);
-{ uint64_t x24 = ((uint64_t)x22 & 0x1ffffffffffffff);
-{ uint128_t x25 = (x23 + x9);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x39);
-{ uint64_t x27 = ((uint64_t)x25 & 0x1ffffffffffffff);
-{ uint128_t x28 = (x15 + ((uint128_t)0x9 * x26));
-{ uint64_t x29 = (uint64_t) (x28 >> 0x39);
-{ uint64_t x30 = ((uint64_t)x28 & 0x1ffffffffffffff);
-{ uint64_t x31 = (x29 + x18);
-{ uint64_t x32 = (x31 >> 0x39);
-{ uint64_t x33 = (x31 & 0x1ffffffffffffff);
-out[0] = x27;
-out[1] = x24;
-out[2] = x32 + x21;
-out[3] = x33;
-out[4] = x30;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesquare(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x9 = (((uint128_t)x2 * x7) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x7 * x2)))));
+ { uint128_t x10 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x9 * ((uint128_t)x7 * x7)));
+ { uint128_t x11 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x9 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
+ { uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x9 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
+ { uint128_t x13 = (((uint128_t)x2 * x2) + (0x9 * (((uint128_t)x4 * x7) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((uint128_t)x7 * x4))))));
+ { uint128_t x14 = (x13 >> 0x39);
+ { uint64_t x15 = ((uint64_t)x13 & 0x1ffffffffffffff);
+ { uint128_t x16 = (x14 + x12);
+ { uint128_t x17 = (x16 >> 0x39);
+ { uint64_t x18 = ((uint64_t)x16 & 0x1ffffffffffffff);
+ { uint128_t x19 = (x17 + x11);
+ { uint128_t x20 = (x19 >> 0x39);
+ { uint64_t x21 = ((uint64_t)x19 & 0x1ffffffffffffff);
+ { uint128_t x22 = (x20 + x10);
+ { uint128_t x23 = (x22 >> 0x39);
+ { uint64_t x24 = ((uint64_t)x22 & 0x1ffffffffffffff);
+ { uint128_t x25 = (x23 + x9);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x39);
+ { uint64_t x27 = ((uint64_t)x25 & 0x1ffffffffffffff);
+ { uint128_t x28 = (x15 + ((uint128_t)0x9 * x26));
+ { uint64_t x29 = (uint64_t) (x28 >> 0x39);
+ { uint64_t x30 = ((uint64_t)x28 & 0x1ffffffffffffff);
+ { uint64_t x31 = (x29 + x18);
+ { uint64_t x32 = (x31 >> 0x39);
+ { uint64_t x33 = (x31 & 0x1ffffffffffffff);
+ out[0] = x30;
+ out[1] = x33;
+ out[2] = (x32 + x21);
+ out[3] = x24;
+ out[4] = x27;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e285m9/freeze.c b/src/Specific/solinas64_2e285m9/freeze.c
index aec265935..00ed78813 100644
--- a/src/Specific/solinas64_2e285m9/freeze.c
+++ b/src/Specific/solinas64_2e285m9/freeze.c
@@ -1,25 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x10;
-out[1] = uint8_t x11 = Op Syntax.SubWithGetBorrow 57 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x1fffffffffffff7;;
+static void freeze(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x10, uint8_t x11 = Op (Syntax.SubWithGetBorrow 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x1fffffffffffff7);
+ { uint64_t x13, uint8_t x14 = Op (Syntax.SubWithGetBorrow 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x11, Return x4, 0x1ffffffffffffff);
+ { uint64_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x14, Return x6, 0x1ffffffffffffff);
+ { uint64_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x17, Return x8, 0x1ffffffffffffff);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x20, Return x7, 0x1ffffffffffffff);
+ { uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
+ { uint64_t x25 = (x24 & 0x1fffffffffffff7);
+ { uint64_t x27, uint8_t x28 = Op (Syntax.AddWithGetCarry 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x10, Return x25);
+ { uint64_t x29 = (x24 & 0x1ffffffffffffff);
+ { uint64_t x31, uint8_t x32 = Op (Syntax.AddWithGetCarry 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x28, Return x13, Return x29);
+ { uint64_t x33 = (x24 & 0x1ffffffffffffff);
+ { uint64_t x35, uint8_t x36 = Op (Syntax.AddWithGetCarry 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x32, Return x16, Return x33);
+ { uint64_t x37 = (x24 & 0x1ffffffffffffff);
+ { uint64_t x39, uint8_t x40 = Op (Syntax.AddWithGetCarry 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x36, Return x19, Return x37);
+ { uint64_t x41 = (x24 & 0x1ffffffffffffff);
+ { uint64_t x43, uint8_t _ = Op (Syntax.AddWithGetCarry 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x40, Return x22, Return x41);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e291m19/femul.c b/src/Specific/solinas64_2e291m19/femul.c
index 7b2ae8b54..aadbb7dc2 100644
--- a/src/Specific/solinas64_2e291m19/femul.c
+++ b/src/Specific/solinas64_2e291m19/femul.c
@@ -1,51 +1,43 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
-{ uint128_t x20 = (((uint128_t)x5 * x18) + ((0x2 * ((uint128_t)x7 * x19)) + ((0x2 * ((uint128_t)x9 * x17)) + ((0x2 * ((uint128_t)x11 * x15)) + ((uint128_t)x10 * x13)))));
-{ uint128_t x21 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((0x2 * ((uint128_t)x9 * x15)) + ((uint128_t)x11 * x13)))) + (0x13 * ((uint128_t)x10 * x18)));
-{ uint128_t x22 = ((((uint128_t)x5 * x17) + ((0x2 * ((uint128_t)x7 * x15)) + ((uint128_t)x9 * x13))) + (0x13 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
-{ uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0x13 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
-{ uint128_t x24 = (((uint128_t)x5 * x13) + (0x13 * ((0x2 * ((uint128_t)x7 * x18)) + ((0x2 * ((uint128_t)x9 * x19)) + ((0x2 * ((uint128_t)x11 * x17)) + (0x2 * ((uint128_t)x10 * x15)))))));
-{ uint128_t x25 = (x24 >> 0x3b);
-{ uint64_t x26 = ((uint64_t)x24 & 0x7ffffffffffffff);
-{ uint128_t x27 = (x25 + x23);
-{ uint128_t x28 = (x27 >> 0x3a);
-{ uint64_t x29 = ((uint64_t)x27 & 0x3ffffffffffffff);
-{ uint128_t x30 = (x28 + x22);
-{ uint128_t x31 = (x30 >> 0x3a);
-{ uint64_t x32 = ((uint64_t)x30 & 0x3ffffffffffffff);
-{ uint128_t x33 = (x31 + x21);
-{ uint128_t x34 = (x33 >> 0x3a);
-{ uint64_t x35 = ((uint64_t)x33 & 0x3ffffffffffffff);
-{ uint128_t x36 = (x34 + x20);
-{ uint128_t x37 = (x36 >> 0x3a);
-{ uint64_t x38 = ((uint64_t)x36 & 0x3ffffffffffffff);
-{ uint128_t x39 = (x26 + (0x13 * x37));
-{ uint64_t x40 = (uint64_t) (x39 >> 0x3b);
-{ uint64_t x41 = ((uint64_t)x39 & 0x7ffffffffffffff);
-{ uint64_t x42 = (x40 + x29);
-{ uint64_t x43 = (x42 >> 0x3a);
-{ uint64_t x44 = (x42 & 0x3ffffffffffffff);
-out[0] = x38;
-out[1] = x35;
-out[2] = x43 + x32;
-out[3] = x44;
-out[4] = x41;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void femul(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) {
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x18 = in2[4];
+ { const uint64_t x19 = in2[3];
+ { const uint64_t x17 = in2[2];
+ { const uint64_t x15 = in2[1];
+ { const uint64_t x13 = in2[0];
+ { uint128_t x20 = (((uint128_t)x5 * x18) + ((0x2 * ((uint128_t)x7 * x19)) + ((0x2 * ((uint128_t)x9 * x17)) + ((0x2 * ((uint128_t)x11 * x15)) + ((uint128_t)x10 * x13)))));
+ { uint128_t x21 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((0x2 * ((uint128_t)x9 * x15)) + ((uint128_t)x11 * x13)))) + (0x13 * ((uint128_t)x10 * x18)));
+ { uint128_t x22 = ((((uint128_t)x5 * x17) + ((0x2 * ((uint128_t)x7 * x15)) + ((uint128_t)x9 * x13))) + (0x13 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
+ { uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0x13 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
+ { uint128_t x24 = (((uint128_t)x5 * x13) + (0x13 * ((0x2 * ((uint128_t)x7 * x18)) + ((0x2 * ((uint128_t)x9 * x19)) + ((0x2 * ((uint128_t)x11 * x17)) + (0x2 * ((uint128_t)x10 * x15)))))));
+ { uint128_t x25 = (x24 >> 0x3b);
+ { uint64_t x26 = ((uint64_t)x24 & 0x7ffffffffffffff);
+ { uint128_t x27 = (x25 + x23);
+ { uint128_t x28 = (x27 >> 0x3a);
+ { uint64_t x29 = ((uint64_t)x27 & 0x3ffffffffffffff);
+ { uint128_t x30 = (x28 + x22);
+ { uint128_t x31 = (x30 >> 0x3a);
+ { uint64_t x32 = ((uint64_t)x30 & 0x3ffffffffffffff);
+ { uint128_t x33 = (x31 + x21);
+ { uint128_t x34 = (x33 >> 0x3a);
+ { uint64_t x35 = ((uint64_t)x33 & 0x3ffffffffffffff);
+ { uint128_t x36 = (x34 + x20);
+ { uint128_t x37 = (x36 >> 0x3a);
+ { uint64_t x38 = ((uint64_t)x36 & 0x3ffffffffffffff);
+ { uint128_t x39 = (x26 + (0x13 * x37));
+ { uint64_t x40 = (uint64_t) (x39 >> 0x3b);
+ { uint64_t x41 = ((uint64_t)x39 & 0x7ffffffffffffff);
+ { uint64_t x42 = (x40 + x29);
+ { uint64_t x43 = (x42 >> 0x3a);
+ { uint64_t x44 = (x42 & 0x3ffffffffffffff);
+ out[0] = x41;
+ out[1] = x44;
+ out[2] = (x43 + x32);
+ out[3] = x35;
+ out[4] = x38;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e291m19/fesquare.c b/src/Specific/solinas64_2e291m19/fesquare.c
index 2cf8a74f3..781a6ed09 100644
--- a/src/Specific/solinas64_2e291m19/fesquare.c
+++ b/src/Specific/solinas64_2e291m19/fesquare.c
@@ -1,51 +1,38 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x9 = (((uint128_t)x2 * x7) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x7 * x2)))));
-{ uint128_t x10 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x13 * ((uint128_t)x7 * x7)));
-{ uint128_t x11 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x13 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
-{ uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x13 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
-{ uint128_t x13 = (((uint128_t)x2 * x2) + (0x13 * ((0x2 * ((uint128_t)x4 * x7)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + (0x2 * ((uint128_t)x7 * x4)))))));
-{ uint128_t x14 = (x13 >> 0x3b);
-{ uint64_t x15 = ((uint64_t)x13 & 0x7ffffffffffffff);
-{ uint128_t x16 = (x14 + x12);
-{ uint128_t x17 = (x16 >> 0x3a);
-{ uint64_t x18 = ((uint64_t)x16 & 0x3ffffffffffffff);
-{ uint128_t x19 = (x17 + x11);
-{ uint128_t x20 = (x19 >> 0x3a);
-{ uint64_t x21 = ((uint64_t)x19 & 0x3ffffffffffffff);
-{ uint128_t x22 = (x20 + x10);
-{ uint128_t x23 = (x22 >> 0x3a);
-{ uint64_t x24 = ((uint64_t)x22 & 0x3ffffffffffffff);
-{ uint128_t x25 = (x23 + x9);
-{ uint128_t x26 = (x25 >> 0x3a);
-{ uint64_t x27 = ((uint64_t)x25 & 0x3ffffffffffffff);
-{ uint128_t x28 = (x15 + (0x13 * x26));
-{ uint64_t x29 = (uint64_t) (x28 >> 0x3b);
-{ uint64_t x30 = ((uint64_t)x28 & 0x7ffffffffffffff);
-{ uint64_t x31 = (x29 + x18);
-{ uint64_t x32 = (x31 >> 0x3a);
-{ uint64_t x33 = (x31 & 0x3ffffffffffffff);
-out[0] = x27;
-out[1] = x24;
-out[2] = x32 + x21;
-out[3] = x33;
-out[4] = x30;
-}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[5];
+static void fesquare(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x9 = (((uint128_t)x2 * x7) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x7 * x2)))));
+ { uint128_t x10 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x13 * ((uint128_t)x7 * x7)));
+ { uint128_t x11 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x13 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
+ { uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x13 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
+ { uint128_t x13 = (((uint128_t)x2 * x2) + (0x13 * ((0x2 * ((uint128_t)x4 * x7)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + (0x2 * ((uint128_t)x7 * x4)))))));
+ { uint128_t x14 = (x13 >> 0x3b);
+ { uint64_t x15 = ((uint64_t)x13 & 0x7ffffffffffffff);
+ { uint128_t x16 = (x14 + x12);
+ { uint128_t x17 = (x16 >> 0x3a);
+ { uint64_t x18 = ((uint64_t)x16 & 0x3ffffffffffffff);
+ { uint128_t x19 = (x17 + x11);
+ { uint128_t x20 = (x19 >> 0x3a);
+ { uint64_t x21 = ((uint64_t)x19 & 0x3ffffffffffffff);
+ { uint128_t x22 = (x20 + x10);
+ { uint128_t x23 = (x22 >> 0x3a);
+ { uint64_t x24 = ((uint64_t)x22 & 0x3ffffffffffffff);
+ { uint128_t x25 = (x23 + x9);
+ { uint128_t x26 = (x25 >> 0x3a);
+ { uint64_t x27 = ((uint64_t)x25 & 0x3ffffffffffffff);
+ { uint128_t x28 = (x15 + (0x13 * x26));
+ { uint64_t x29 = (uint64_t) (x28 >> 0x3b);
+ { uint64_t x30 = ((uint64_t)x28 & 0x7ffffffffffffff);
+ { uint64_t x31 = (x29 + x18);
+ { uint64_t x32 = (x31 >> 0x3a);
+ { uint64_t x33 = (x31 & 0x3ffffffffffffff);
+ out[0] = x30;
+ out[1] = x33;
+ out[2] = (x32 + x21);
+ out[3] = x24;
+ out[4] = x27;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e291m19/freeze.c b/src/Specific/solinas64_2e291m19/freeze.c
index 14406c6fe..be656508b 100644
--- a/src/Specific/solinas64_2e291m19/freeze.c
+++ b/src/Specific/solinas64_2e291m19/freeze.c
@@ -1,25 +1,29 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x10;
-out[1] = uint8_t x11 = Op Syntax.SubWithGetBorrow 59 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7ffffffffffffed;;
+static void freeze(uint64_t out[5], const uint64_t in1[5]) {
+ { const uint64_t x7 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x10, uint8_t x11 = Op (Syntax.SubWithGetBorrow 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x7ffffffffffffed);
+ { uint64_t x13, uint8_t x14 = Op (Syntax.SubWithGetBorrow 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x11, Return x4, 0x3ffffffffffffff);
+ { uint64_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x14, Return x6, 0x3ffffffffffffff);
+ { uint64_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x17, Return x8, 0x3ffffffffffffff);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x20, Return x7, 0x3ffffffffffffff);
+ { uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
+ { uint64_t x25 = (x24 & 0x7ffffffffffffed);
+ { uint64_t x27, uint8_t x28 = Op (Syntax.AddWithGetCarry 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x10, Return x25);
+ { uint64_t x29 = (x24 & 0x3ffffffffffffff);
+ { uint64_t x31, uint8_t x32 = Op (Syntax.AddWithGetCarry 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x28, Return x13, Return x29);
+ { uint64_t x33 = (x24 & 0x3ffffffffffffff);
+ { uint64_t x35, uint8_t x36 = Op (Syntax.AddWithGetCarry 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x32, Return x16, Return x33);
+ { uint64_t x37 = (x24 & 0x3ffffffffffffff);
+ { uint64_t x39, uint8_t x40 = Op (Syntax.AddWithGetCarry 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x36, Return x19, Return x37);
+ { uint64_t x41 = (x24 & 0x3ffffffffffffff);
+ { uint64_t x43, uint8_t _ = Op (Syntax.AddWithGetCarry 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x40, Return x22, Return x41);
+ out[0] = x27;
+ out[1] = x31;
+ out[2] = x35;
+ out[3] = x39;
+ out[4] = x43;
+ }}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e321m9/femul.c b/src/Specific/solinas64_2e321m9/femul.c
index ac91fec15..688d2ca13 100644
--- a/src/Specific/solinas64_2e321m9/femul.c
+++ b/src/Specific/solinas64_2e321m9/femul.c
@@ -1,56 +1,50 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint128_t x24 = (((uint128_t)x5 * x22) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + (((uint128_t)x11 * x19) + (((uint128_t)x13 * x17) + ((uint128_t)x12 * x15))))));
-{ uint128_t x25 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + (((uint128_t)x9 * x19) + ((0x2 * ((uint128_t)x11 * x17)) + ((uint128_t)x13 * x15))))) + (0x9 * (0x2 * ((uint128_t)x12 * x22))));
-{ uint128_t x26 = ((((uint128_t)x5 * x21) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + ((uint128_t)x11 * x15)))) + (0x9 * (((uint128_t)x13 * x22) + ((uint128_t)x12 * x23))));
-{ uint128_t x27 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((uint128_t)x9 * x15))) + (0x9 * ((0x2 * ((uint128_t)x11 * x22)) + (((uint128_t)x13 * x23) + (0x2 * ((uint128_t)x12 * x21))))));
-{ uint128_t x28 = ((((uint128_t)x5 * x17) + ((uint128_t)x7 * x15)) + (0x9 * (((uint128_t)x9 * x22) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x12 * x19))))));
-{ uint128_t x29 = (((uint128_t)x5 * x15) + (0x9 * ((0x2 * ((uint128_t)x7 * x22)) + (((uint128_t)x9 * x23) + ((0x2 * ((uint128_t)x11 * x21)) + (((uint128_t)x13 * x19) + (0x2 * ((uint128_t)x12 * x17))))))));
-{ uint64_t x30 = (uint64_t) (x29 >> 0x36);
-{ uint64_t x31 = ((uint64_t)x29 & 0x3fffffffffffff);
-{ uint128_t x32 = (x30 + x28);
-{ uint64_t x33 = (uint64_t) (x32 >> 0x35);
-{ uint64_t x34 = ((uint64_t)x32 & 0x1fffffffffffff);
-{ uint128_t x35 = (x33 + x27);
-{ uint64_t x36 = (uint64_t) (x35 >> 0x36);
-{ uint64_t x37 = ((uint64_t)x35 & 0x3fffffffffffff);
-{ uint128_t x38 = (x36 + x26);
-{ uint64_t x39 = (uint64_t) (x38 >> 0x35);
-{ uint64_t x40 = ((uint64_t)x38 & 0x1fffffffffffff);
-{ uint128_t x41 = (x39 + x25);
-{ uint64_t x42 = (uint64_t) (x41 >> 0x36);
-{ uint64_t x43 = ((uint64_t)x41 & 0x3fffffffffffff);
-{ uint128_t x44 = (x42 + x24);
-{ uint64_t x45 = (uint64_t) (x44 >> 0x35);
-{ uint64_t x46 = ((uint64_t)x44 & 0x1fffffffffffff);
-{ uint64_t x47 = (x31 + (0x9 * x45));
-{ uint64_t x48 = (x47 >> 0x36);
-{ uint64_t x49 = (x47 & 0x3fffffffffffff);
-{ uint64_t x50 = (x48 + x34);
-{ uint64_t x51 = (x50 >> 0x35);
-{ uint64_t x52 = (x50 & 0x1fffffffffffff);
-out[0] = x46;
-out[1] = x43;
-out[2] = x40;
-out[3] = x51 + x37;
-out[4] = x52;
-out[5] = x49;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint128_t x24 = (((uint128_t)x5 * x22) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + (((uint128_t)x11 * x19) + (((uint128_t)x13 * x17) + ((uint128_t)x12 * x15))))));
+ { uint128_t x25 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + (((uint128_t)x9 * x19) + ((0x2 * ((uint128_t)x11 * x17)) + ((uint128_t)x13 * x15))))) + (0x9 * (0x2 * ((uint128_t)x12 * x22))));
+ { uint128_t x26 = ((((uint128_t)x5 * x21) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + ((uint128_t)x11 * x15)))) + (0x9 * (((uint128_t)x13 * x22) + ((uint128_t)x12 * x23))));
+ { uint128_t x27 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((uint128_t)x9 * x15))) + (0x9 * ((0x2 * ((uint128_t)x11 * x22)) + (((uint128_t)x13 * x23) + (0x2 * ((uint128_t)x12 * x21))))));
+ { uint128_t x28 = ((((uint128_t)x5 * x17) + ((uint128_t)x7 * x15)) + (0x9 * (((uint128_t)x9 * x22) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x12 * x19))))));
+ { uint128_t x29 = (((uint128_t)x5 * x15) + (0x9 * ((0x2 * ((uint128_t)x7 * x22)) + (((uint128_t)x9 * x23) + ((0x2 * ((uint128_t)x11 * x21)) + (((uint128_t)x13 * x19) + (0x2 * ((uint128_t)x12 * x17))))))));
+ { uint64_t x30 = (uint64_t) (x29 >> 0x36);
+ { uint64_t x31 = ((uint64_t)x29 & 0x3fffffffffffff);
+ { uint128_t x32 = (x30 + x28);
+ { uint64_t x33 = (uint64_t) (x32 >> 0x35);
+ { uint64_t x34 = ((uint64_t)x32 & 0x1fffffffffffff);
+ { uint128_t x35 = (x33 + x27);
+ { uint64_t x36 = (uint64_t) (x35 >> 0x36);
+ { uint64_t x37 = ((uint64_t)x35 & 0x3fffffffffffff);
+ { uint128_t x38 = (x36 + x26);
+ { uint64_t x39 = (uint64_t) (x38 >> 0x35);
+ { uint64_t x40 = ((uint64_t)x38 & 0x1fffffffffffff);
+ { uint128_t x41 = (x39 + x25);
+ { uint64_t x42 = (uint64_t) (x41 >> 0x36);
+ { uint64_t x43 = ((uint64_t)x41 & 0x3fffffffffffff);
+ { uint128_t x44 = (x42 + x24);
+ { uint64_t x45 = (uint64_t) (x44 >> 0x35);
+ { uint64_t x46 = ((uint64_t)x44 & 0x1fffffffffffff);
+ { uint64_t x47 = (x31 + (0x9 * x45));
+ { uint64_t x48 = (x47 >> 0x36);
+ { uint64_t x49 = (x47 & 0x3fffffffffffff);
+ { uint64_t x50 = (x48 + x34);
+ { uint64_t x51 = (x50 >> 0x35);
+ { uint64_t x52 = (x50 & 0x1fffffffffffff);
+ out[0] = x49;
+ out[1] = x52;
+ out[2] = (x51 + x37);
+ out[3] = x40;
+ out[4] = x43;
+ out[5] = x46;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e321m9/fesquare.c b/src/Specific/solinas64_2e321m9/fesquare.c
index 8312f2988..b2b14b5c2 100644
--- a/src/Specific/solinas64_2e321m9/fesquare.c
+++ b/src/Specific/solinas64_2e321m9/fesquare.c
@@ -1,56 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x11 = (((uint128_t)x2 * x9) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x9 * x2))))));
-{ uint128_t x12 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + (((uint128_t)x6 * x6) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x9 * (0x2 * ((uint128_t)x9 * x9))));
-{ uint128_t x13 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x9 * (((uint128_t)x10 * x9) + ((uint128_t)x9 * x10))));
-{ uint128_t x14 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x9 * ((0x2 * ((uint128_t)x8 * x9)) + (((uint128_t)x10 * x10) + (0x2 * ((uint128_t)x9 * x8))))));
-{ uint128_t x15 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x9 * (((uint128_t)x6 * x9) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + ((uint128_t)x9 * x6))))));
-{ uint128_t x16 = (((uint128_t)x2 * x2) + (0x9 * ((0x2 * ((uint128_t)x4 * x9)) + (((uint128_t)x6 * x10) + ((0x2 * ((uint128_t)x8 * x8)) + (((uint128_t)x10 * x6) + (0x2 * ((uint128_t)x9 * x4))))))));
-{ uint64_t x17 = (uint64_t) (x16 >> 0x36);
-{ uint64_t x18 = ((uint64_t)x16 & 0x3fffffffffffff);
-{ uint128_t x19 = (x17 + x15);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x35);
-{ uint64_t x21 = ((uint64_t)x19 & 0x1fffffffffffff);
-{ uint128_t x22 = (x20 + x14);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x36);
-{ uint64_t x24 = ((uint64_t)x22 & 0x3fffffffffffff);
-{ uint128_t x25 = (x23 + x13);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x35);
-{ uint64_t x27 = ((uint64_t)x25 & 0x1fffffffffffff);
-{ uint128_t x28 = (x26 + x12);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x36);
-{ uint64_t x30 = ((uint64_t)x28 & 0x3fffffffffffff);
-{ uint128_t x31 = (x29 + x11);
-{ uint64_t x32 = (uint64_t) (x31 >> 0x35);
-{ uint64_t x33 = ((uint64_t)x31 & 0x1fffffffffffff);
-{ uint64_t x34 = (x18 + (0x9 * x32));
-{ uint64_t x35 = (x34 >> 0x36);
-{ uint64_t x36 = (x34 & 0x3fffffffffffff);
-{ uint64_t x37 = (x35 + x21);
-{ uint64_t x38 = (x37 >> 0x35);
-{ uint64_t x39 = (x37 & 0x1fffffffffffff);
-out[0] = x33;
-out[1] = x30;
-out[2] = x27;
-out[3] = x38 + x24;
-out[4] = x39;
-out[5] = x36;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesquare(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x11 = (((uint128_t)x2 * x9) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x9 * x2))))));
+ { uint128_t x12 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + (((uint128_t)x6 * x6) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x9 * (0x2 * ((uint128_t)x9 * x9))));
+ { uint128_t x13 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x9 * (((uint128_t)x10 * x9) + ((uint128_t)x9 * x10))));
+ { uint128_t x14 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x9 * ((0x2 * ((uint128_t)x8 * x9)) + (((uint128_t)x10 * x10) + (0x2 * ((uint128_t)x9 * x8))))));
+ { uint128_t x15 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x9 * (((uint128_t)x6 * x9) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + ((uint128_t)x9 * x6))))));
+ { uint128_t x16 = (((uint128_t)x2 * x2) + (0x9 * ((0x2 * ((uint128_t)x4 * x9)) + (((uint128_t)x6 * x10) + ((0x2 * ((uint128_t)x8 * x8)) + (((uint128_t)x10 * x6) + (0x2 * ((uint128_t)x9 * x4))))))));
+ { uint64_t x17 = (uint64_t) (x16 >> 0x36);
+ { uint64_t x18 = ((uint64_t)x16 & 0x3fffffffffffff);
+ { uint128_t x19 = (x17 + x15);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x35);
+ { uint64_t x21 = ((uint64_t)x19 & 0x1fffffffffffff);
+ { uint128_t x22 = (x20 + x14);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x36);
+ { uint64_t x24 = ((uint64_t)x22 & 0x3fffffffffffff);
+ { uint128_t x25 = (x23 + x13);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x35);
+ { uint64_t x27 = ((uint64_t)x25 & 0x1fffffffffffff);
+ { uint128_t x28 = (x26 + x12);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x36);
+ { uint64_t x30 = ((uint64_t)x28 & 0x3fffffffffffff);
+ { uint128_t x31 = (x29 + x11);
+ { uint64_t x32 = (uint64_t) (x31 >> 0x35);
+ { uint64_t x33 = ((uint64_t)x31 & 0x1fffffffffffff);
+ { uint64_t x34 = (x18 + (0x9 * x32));
+ { uint64_t x35 = (x34 >> 0x36);
+ { uint64_t x36 = (x34 & 0x3fffffffffffff);
+ { uint64_t x37 = (x35 + x21);
+ { uint64_t x38 = (x37 >> 0x35);
+ { uint64_t x39 = (x37 & 0x1fffffffffffff);
+ out[0] = x36;
+ out[1] = x39;
+ out[2] = (x38 + x24);
+ out[3] = x27;
+ out[4] = x30;
+ out[5] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e321m9/freeze.c b/src/Specific/solinas64_2e321m9/freeze.c
index c20a7e799..5378a290e 100644
--- a/src/Specific/solinas64_2e321m9/freeze.c
+++ b/src/Specific/solinas64_2e321m9/freeze.c
@@ -1,25 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x12;
-out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 54 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3ffffffffffff7;;
+static void freeze(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x3ffffffffffff7);
+ { uint64_t x15, uint8_t x16 = Op (Syntax.SubWithGetBorrow 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x13, Return x4, 0x1fffffffffffff);
+ { uint64_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x16, Return x6, 0x3fffffffffffff);
+ { uint64_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x19, Return x8, 0x1fffffffffffff);
+ { uint64_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x22, Return x10, 0x3fffffffffffff);
+ { uint64_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x25, Return x9, 0x1fffffffffffff);
+ { uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+ { uint64_t x30 = (x29 & 0x3ffffffffffff7);
+ { uint64_t x32, uint8_t x33 = Op (Syntax.AddWithGetCarry 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x12, Return x30);
+ { uint64_t x34 = (x29 & 0x1fffffffffffff);
+ { uint64_t x36, uint8_t x37 = Op (Syntax.AddWithGetCarry 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x33, Return x15, Return x34);
+ { uint64_t x38 = (x29 & 0x3fffffffffffff);
+ { uint64_t x40, uint8_t x41 = Op (Syntax.AddWithGetCarry 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x37, Return x18, Return x38);
+ { uint64_t x42 = (x29 & 0x1fffffffffffff);
+ { uint64_t x44, uint8_t x45 = Op (Syntax.AddWithGetCarry 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x41, Return x21, Return x42);
+ { uint64_t x46 = (x29 & 0x3fffffffffffff);
+ { uint64_t x48, uint8_t x49 = Op (Syntax.AddWithGetCarry 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x45, Return x24, Return x46);
+ { uint64_t x50 = (x29 & 0x1fffffffffffff);
+ { uint64_t x52, uint8_t _ = Op (Syntax.AddWithGetCarry 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x49, Return x27, Return x50);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e322m2e161m1/freeze.c b/src/Specific/solinas64_2e322m2e161m1/freeze.c
index 71d073624..7361e059f 100644
--- a/src/Specific/solinas64_2e322m2e161m1/freeze.c
+++ b/src/Specific/solinas64_2e322m2e161m1/freeze.c
@@ -1,25 +1,39 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x14;
-out[1] = uint8_t x15 = Op Syntax.SubWithGetBorrow 46 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3fffffffffff;;
+static void freeze(uint64_t out[7], const uint64_t in1[7]) {
+ { const uint64_t x11 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x3fffffffffff);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x15, Return x4, 0x3fffffffffff);
+ { uint64_t x20, uint8_t x21 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x18, Return x6, 0x3fffffffffff);
+ { uint64_t x23, uint8_t x24 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x21, Return x8, 0x3fffff7fffff);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x24, Return x10, 0x3fffffffffff);
+ { uint64_t x29, uint8_t x30 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x12, 0x3fffffffffff);
+ { uint64_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x30, Return x11, 0x3fffffffffff);
+ { uint64_t x34 = (uint64_t)cmovznz(x33, 0x0, 0xffffffffffffffffL);
+ { uint64_t x35 = (x34 & 0x3fffffffffff);
+ { uint64_t x37, uint8_t x38 = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x14, Return x35);
+ { uint64_t x39 = (x34 & 0x3fffffffffff);
+ { uint64_t x41, uint8_t x42 = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x38, Return x17, Return x39);
+ { uint64_t x43 = (x34 & 0x3fffffffffff);
+ { uint64_t x45, uint8_t x46 = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x42, Return x20, Return x43);
+ { uint64_t x47 = (x34 & 0x3fffff7fffff);
+ { uint64_t x49, uint8_t x50 = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x46, Return x23, Return x47);
+ { uint64_t x51 = (x34 & 0x3fffffffffff);
+ { uint64_t x53, uint8_t x54 = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x50, Return x26, Return x51);
+ { uint64_t x55 = (x34 & 0x3fffffffffff);
+ { uint64_t x57, uint8_t x58 = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x54, Return x29, Return x55);
+ { uint64_t x59 = (x34 & 0x3fffffffffff);
+ { uint64_t x61, uint8_t _ = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x58, Return x32, Return x59);
+ out[0] = x37;
+ out[1] = x41;
+ out[2] = x45;
+ out[3] = x49;
+ out[4] = x53;
+ out[5] = x57;
+ out[6] = x61;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e336m17/femul.c b/src/Specific/solinas64_2e336m17/femul.c
index d1bdecab7..9e663e79f 100644
--- a/src/Specific/solinas64_2e336m17/femul.c
+++ b/src/Specific/solinas64_2e336m17/femul.c
@@ -1,56 +1,50 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint128_t x24 = (((uint128_t)x5 * x22) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + (((uint128_t)x11 * x19) + (((uint128_t)x13 * x17) + ((uint128_t)x12 * x15))))));
-{ uint128_t x25 = ((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x13 * x15))))) + (0x11 * ((uint128_t)x12 * x22)));
-{ uint128_t x26 = ((((uint128_t)x5 * x21) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + ((uint128_t)x11 * x15)))) + (0x11 * (((uint128_t)x13 * x22) + ((uint128_t)x12 * x23))));
-{ uint128_t x27 = ((((uint128_t)x5 * x19) + (((uint128_t)x7 * x17) + ((uint128_t)x9 * x15))) + (0x11 * (((uint128_t)x11 * x22) + (((uint128_t)x13 * x23) + ((uint128_t)x12 * x21)))));
-{ uint128_t x28 = ((((uint128_t)x5 * x17) + ((uint128_t)x7 * x15)) + (0x11 * (((uint128_t)x9 * x22) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x12 * x19))))));
-{ uint128_t x29 = (((uint128_t)x5 * x15) + (0x11 * (((uint128_t)x7 * x22) + (((uint128_t)x9 * x23) + (((uint128_t)x11 * x21) + (((uint128_t)x13 * x19) + ((uint128_t)x12 * x17)))))));
-{ uint128_t x30 = (x29 >> 0x38);
-{ uint64_t x31 = ((uint64_t)x29 & 0xffffffffffffff);
-{ uint128_t x32 = (x30 + x28);
-{ uint128_t x33 = (x32 >> 0x38);
-{ uint64_t x34 = ((uint64_t)x32 & 0xffffffffffffff);
-{ uint128_t x35 = (x33 + x27);
-{ uint128_t x36 = (x35 >> 0x38);
-{ uint64_t x37 = ((uint64_t)x35 & 0xffffffffffffff);
-{ uint128_t x38 = (x36 + x26);
-{ uint128_t x39 = (x38 >> 0x38);
-{ uint64_t x40 = ((uint64_t)x38 & 0xffffffffffffff);
-{ uint128_t x41 = (x39 + x25);
-{ uint64_t x42 = (uint64_t) (x41 >> 0x38);
-{ uint64_t x43 = ((uint64_t)x41 & 0xffffffffffffff);
-{ uint128_t x44 = (x42 + x24);
-{ uint64_t x45 = (uint64_t) (x44 >> 0x38);
-{ uint64_t x46 = ((uint64_t)x44 & 0xffffffffffffff);
-{ uint128_t x47 = (x31 + ((uint128_t)0x11 * x45));
-{ uint64_t x48 = (uint64_t) (x47 >> 0x38);
-{ uint64_t x49 = ((uint64_t)x47 & 0xffffffffffffff);
-{ uint64_t x50 = (x48 + x34);
-{ uint64_t x51 = (x50 >> 0x38);
-{ uint64_t x52 = (x50 & 0xffffffffffffff);
-out[0] = x46;
-out[1] = x43;
-out[2] = x40;
-out[3] = x51 + x37;
-out[4] = x52;
-out[5] = x49;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint128_t x24 = (((uint128_t)x5 * x22) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + (((uint128_t)x11 * x19) + (((uint128_t)x13 * x17) + ((uint128_t)x12 * x15))))));
+ { uint128_t x25 = ((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x13 * x15))))) + (0x11 * ((uint128_t)x12 * x22)));
+ { uint128_t x26 = ((((uint128_t)x5 * x21) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + ((uint128_t)x11 * x15)))) + (0x11 * (((uint128_t)x13 * x22) + ((uint128_t)x12 * x23))));
+ { uint128_t x27 = ((((uint128_t)x5 * x19) + (((uint128_t)x7 * x17) + ((uint128_t)x9 * x15))) + (0x11 * (((uint128_t)x11 * x22) + (((uint128_t)x13 * x23) + ((uint128_t)x12 * x21)))));
+ { uint128_t x28 = ((((uint128_t)x5 * x17) + ((uint128_t)x7 * x15)) + (0x11 * (((uint128_t)x9 * x22) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x12 * x19))))));
+ { uint128_t x29 = (((uint128_t)x5 * x15) + (0x11 * (((uint128_t)x7 * x22) + (((uint128_t)x9 * x23) + (((uint128_t)x11 * x21) + (((uint128_t)x13 * x19) + ((uint128_t)x12 * x17)))))));
+ { uint128_t x30 = (x29 >> 0x38);
+ { uint64_t x31 = ((uint64_t)x29 & 0xffffffffffffff);
+ { uint128_t x32 = (x30 + x28);
+ { uint128_t x33 = (x32 >> 0x38);
+ { uint64_t x34 = ((uint64_t)x32 & 0xffffffffffffff);
+ { uint128_t x35 = (x33 + x27);
+ { uint128_t x36 = (x35 >> 0x38);
+ { uint64_t x37 = ((uint64_t)x35 & 0xffffffffffffff);
+ { uint128_t x38 = (x36 + x26);
+ { uint128_t x39 = (x38 >> 0x38);
+ { uint64_t x40 = ((uint64_t)x38 & 0xffffffffffffff);
+ { uint128_t x41 = (x39 + x25);
+ { uint64_t x42 = (uint64_t) (x41 >> 0x38);
+ { uint64_t x43 = ((uint64_t)x41 & 0xffffffffffffff);
+ { uint128_t x44 = (x42 + x24);
+ { uint64_t x45 = (uint64_t) (x44 >> 0x38);
+ { uint64_t x46 = ((uint64_t)x44 & 0xffffffffffffff);
+ { uint128_t x47 = (x31 + ((uint128_t)0x11 * x45));
+ { uint64_t x48 = (uint64_t) (x47 >> 0x38);
+ { uint64_t x49 = ((uint64_t)x47 & 0xffffffffffffff);
+ { uint64_t x50 = (x48 + x34);
+ { uint64_t x51 = (x50 >> 0x38);
+ { uint64_t x52 = (x50 & 0xffffffffffffff);
+ out[0] = x49;
+ out[1] = x52;
+ out[2] = (x51 + x37);
+ out[3] = x40;
+ out[4] = x43;
+ out[5] = x46;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e336m17/fesquare.c b/src/Specific/solinas64_2e336m17/fesquare.c
index 7f903cd78..86e650785 100644
--- a/src/Specific/solinas64_2e336m17/fesquare.c
+++ b/src/Specific/solinas64_2e336m17/fesquare.c
@@ -1,56 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x11 = (((uint128_t)x2 * x9) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x9 * x2))))));
-{ uint128_t x12 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0x11 * ((uint128_t)x9 * x9)));
-{ uint128_t x13 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x11 * (((uint128_t)x10 * x9) + ((uint128_t)x9 * x10))));
-{ uint128_t x14 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x11 * (((uint128_t)x8 * x9) + (((uint128_t)x10 * x10) + ((uint128_t)x9 * x8)))));
-{ uint128_t x15 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * (((uint128_t)x6 * x9) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + ((uint128_t)x9 * x6))))));
-{ uint128_t x16 = (((uint128_t)x2 * x2) + (0x11 * (((uint128_t)x4 * x9) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + ((uint128_t)x9 * x4)))))));
-{ uint128_t x17 = (x16 >> 0x38);
-{ uint64_t x18 = ((uint64_t)x16 & 0xffffffffffffff);
-{ uint128_t x19 = (x17 + x15);
-{ uint128_t x20 = (x19 >> 0x38);
-{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffffffff);
-{ uint128_t x22 = (x20 + x14);
-{ uint128_t x23 = (x22 >> 0x38);
-{ uint64_t x24 = ((uint64_t)x22 & 0xffffffffffffff);
-{ uint128_t x25 = (x23 + x13);
-{ uint128_t x26 = (x25 >> 0x38);
-{ uint64_t x27 = ((uint64_t)x25 & 0xffffffffffffff);
-{ uint128_t x28 = (x26 + x12);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x38);
-{ uint64_t x30 = ((uint64_t)x28 & 0xffffffffffffff);
-{ uint128_t x31 = (x29 + x11);
-{ uint64_t x32 = (uint64_t) (x31 >> 0x38);
-{ uint64_t x33 = ((uint64_t)x31 & 0xffffffffffffff);
-{ uint128_t x34 = (x18 + ((uint128_t)0x11 * x32));
-{ uint64_t x35 = (uint64_t) (x34 >> 0x38);
-{ uint64_t x36 = ((uint64_t)x34 & 0xffffffffffffff);
-{ uint64_t x37 = (x35 + x21);
-{ uint64_t x38 = (x37 >> 0x38);
-{ uint64_t x39 = (x37 & 0xffffffffffffff);
-out[0] = x33;
-out[1] = x30;
-out[2] = x27;
-out[3] = x38 + x24;
-out[4] = x39;
-out[5] = x36;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesquare(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x11 = (((uint128_t)x2 * x9) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x9 * x2))))));
+ { uint128_t x12 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0x11 * ((uint128_t)x9 * x9)));
+ { uint128_t x13 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x11 * (((uint128_t)x10 * x9) + ((uint128_t)x9 * x10))));
+ { uint128_t x14 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x11 * (((uint128_t)x8 * x9) + (((uint128_t)x10 * x10) + ((uint128_t)x9 * x8)))));
+ { uint128_t x15 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * (((uint128_t)x6 * x9) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + ((uint128_t)x9 * x6))))));
+ { uint128_t x16 = (((uint128_t)x2 * x2) + (0x11 * (((uint128_t)x4 * x9) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + ((uint128_t)x9 * x4)))))));
+ { uint128_t x17 = (x16 >> 0x38);
+ { uint64_t x18 = ((uint64_t)x16 & 0xffffffffffffff);
+ { uint128_t x19 = (x17 + x15);
+ { uint128_t x20 = (x19 >> 0x38);
+ { uint64_t x21 = ((uint64_t)x19 & 0xffffffffffffff);
+ { uint128_t x22 = (x20 + x14);
+ { uint128_t x23 = (x22 >> 0x38);
+ { uint64_t x24 = ((uint64_t)x22 & 0xffffffffffffff);
+ { uint128_t x25 = (x23 + x13);
+ { uint128_t x26 = (x25 >> 0x38);
+ { uint64_t x27 = ((uint64_t)x25 & 0xffffffffffffff);
+ { uint128_t x28 = (x26 + x12);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x38);
+ { uint64_t x30 = ((uint64_t)x28 & 0xffffffffffffff);
+ { uint128_t x31 = (x29 + x11);
+ { uint64_t x32 = (uint64_t) (x31 >> 0x38);
+ { uint64_t x33 = ((uint64_t)x31 & 0xffffffffffffff);
+ { uint128_t x34 = (x18 + ((uint128_t)0x11 * x32));
+ { uint64_t x35 = (uint64_t) (x34 >> 0x38);
+ { uint64_t x36 = ((uint64_t)x34 & 0xffffffffffffff);
+ { uint64_t x37 = (x35 + x21);
+ { uint64_t x38 = (x37 >> 0x38);
+ { uint64_t x39 = (x37 & 0xffffffffffffff);
+ out[0] = x36;
+ out[1] = x39;
+ out[2] = (x38 + x24);
+ out[3] = x27;
+ out[4] = x30;
+ out[5] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e336m17/freeze.c b/src/Specific/solinas64_2e336m17/freeze.c
index 7f9c2afb6..2ea95eb06 100644
--- a/src/Specific/solinas64_2e336m17/freeze.c
+++ b/src/Specific/solinas64_2e336m17/freeze.c
@@ -1,25 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x12;
-out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 56 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffffffffffef;;
+static void freeze(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xffffffffffffef);
+ { uint64_t x15, uint8_t x16 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x13, Return x4, 0xffffffffffffff);
+ { uint64_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x16, Return x6, 0xffffffffffffff);
+ { uint64_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x19, Return x8, 0xffffffffffffff);
+ { uint64_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x22, Return x10, 0xffffffffffffff);
+ { uint64_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x25, Return x9, 0xffffffffffffff);
+ { uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+ { uint64_t x30 = (x29 & 0xffffffffffffef);
+ { uint64_t x32, uint8_t x33 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x12, Return x30);
+ { uint64_t x34 = (x29 & 0xffffffffffffff);
+ { uint64_t x36, uint8_t x37 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x33, Return x15, Return x34);
+ { uint64_t x38 = (x29 & 0xffffffffffffff);
+ { uint64_t x40, uint8_t x41 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x37, Return x18, Return x38);
+ { uint64_t x42 = (x29 & 0xffffffffffffff);
+ { uint64_t x44, uint8_t x45 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x41, Return x21, Return x42);
+ { uint64_t x46 = (x29 & 0xffffffffffffff);
+ { uint64_t x48, uint8_t x49 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x45, Return x24, Return x46);
+ { uint64_t x50 = (x29 & 0xffffffffffffff);
+ { uint64_t x52, uint8_t _ = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x49, Return x27, Return x50);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e336m3/femul.c b/src/Specific/solinas64_2e336m3/femul.c
index f0bc0c880..879ec974c 100644
--- a/src/Specific/solinas64_2e336m3/femul.c
+++ b/src/Specific/solinas64_2e336m3/femul.c
@@ -1,56 +1,50 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint128_t x24 = (((uint128_t)x5 * x22) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + (((uint128_t)x11 * x19) + (((uint128_t)x13 * x17) + ((uint128_t)x12 * x15))))));
-{ uint128_t x25 = ((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x13 * x15))))) + (0x3 * ((uint128_t)x12 * x22)));
-{ uint128_t x26 = ((((uint128_t)x5 * x21) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + ((uint128_t)x11 * x15)))) + (0x3 * (((uint128_t)x13 * x22) + ((uint128_t)x12 * x23))));
-{ uint128_t x27 = ((((uint128_t)x5 * x19) + (((uint128_t)x7 * x17) + ((uint128_t)x9 * x15))) + (0x3 * (((uint128_t)x11 * x22) + (((uint128_t)x13 * x23) + ((uint128_t)x12 * x21)))));
-{ uint128_t x28 = ((((uint128_t)x5 * x17) + ((uint128_t)x7 * x15)) + (0x3 * (((uint128_t)x9 * x22) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x12 * x19))))));
-{ uint128_t x29 = (((uint128_t)x5 * x15) + (0x3 * (((uint128_t)x7 * x22) + (((uint128_t)x9 * x23) + (((uint128_t)x11 * x21) + (((uint128_t)x13 * x19) + ((uint128_t)x12 * x17)))))));
-{ uint64_t x30 = (uint64_t) (x29 >> 0x38);
-{ uint64_t x31 = ((uint64_t)x29 & 0xffffffffffffff);
-{ uint128_t x32 = (x30 + x28);
-{ uint64_t x33 = (uint64_t) (x32 >> 0x38);
-{ uint64_t x34 = ((uint64_t)x32 & 0xffffffffffffff);
-{ uint128_t x35 = (x33 + x27);
-{ uint64_t x36 = (uint64_t) (x35 >> 0x38);
-{ uint64_t x37 = ((uint64_t)x35 & 0xffffffffffffff);
-{ uint128_t x38 = (x36 + x26);
-{ uint64_t x39 = (uint64_t) (x38 >> 0x38);
-{ uint64_t x40 = ((uint64_t)x38 & 0xffffffffffffff);
-{ uint128_t x41 = (x39 + x25);
-{ uint64_t x42 = (uint64_t) (x41 >> 0x38);
-{ uint64_t x43 = ((uint64_t)x41 & 0xffffffffffffff);
-{ uint128_t x44 = (x42 + x24);
-{ uint64_t x45 = (uint64_t) (x44 >> 0x38);
-{ uint64_t x46 = ((uint64_t)x44 & 0xffffffffffffff);
-{ uint64_t x47 = (x31 + (0x3 * x45));
-{ uint64_t x48 = (x47 >> 0x38);
-{ uint64_t x49 = (x47 & 0xffffffffffffff);
-{ uint64_t x50 = (x48 + x34);
-{ uint64_t x51 = (x50 >> 0x38);
-{ uint64_t x52 = (x50 & 0xffffffffffffff);
-out[0] = x46;
-out[1] = x43;
-out[2] = x40;
-out[3] = x51 + x37;
-out[4] = x52;
-out[5] = x49;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint128_t x24 = (((uint128_t)x5 * x22) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + (((uint128_t)x11 * x19) + (((uint128_t)x13 * x17) + ((uint128_t)x12 * x15))))));
+ { uint128_t x25 = ((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x13 * x15))))) + (0x3 * ((uint128_t)x12 * x22)));
+ { uint128_t x26 = ((((uint128_t)x5 * x21) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + ((uint128_t)x11 * x15)))) + (0x3 * (((uint128_t)x13 * x22) + ((uint128_t)x12 * x23))));
+ { uint128_t x27 = ((((uint128_t)x5 * x19) + (((uint128_t)x7 * x17) + ((uint128_t)x9 * x15))) + (0x3 * (((uint128_t)x11 * x22) + (((uint128_t)x13 * x23) + ((uint128_t)x12 * x21)))));
+ { uint128_t x28 = ((((uint128_t)x5 * x17) + ((uint128_t)x7 * x15)) + (0x3 * (((uint128_t)x9 * x22) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x12 * x19))))));
+ { uint128_t x29 = (((uint128_t)x5 * x15) + (0x3 * (((uint128_t)x7 * x22) + (((uint128_t)x9 * x23) + (((uint128_t)x11 * x21) + (((uint128_t)x13 * x19) + ((uint128_t)x12 * x17)))))));
+ { uint64_t x30 = (uint64_t) (x29 >> 0x38);
+ { uint64_t x31 = ((uint64_t)x29 & 0xffffffffffffff);
+ { uint128_t x32 = (x30 + x28);
+ { uint64_t x33 = (uint64_t) (x32 >> 0x38);
+ { uint64_t x34 = ((uint64_t)x32 & 0xffffffffffffff);
+ { uint128_t x35 = (x33 + x27);
+ { uint64_t x36 = (uint64_t) (x35 >> 0x38);
+ { uint64_t x37 = ((uint64_t)x35 & 0xffffffffffffff);
+ { uint128_t x38 = (x36 + x26);
+ { uint64_t x39 = (uint64_t) (x38 >> 0x38);
+ { uint64_t x40 = ((uint64_t)x38 & 0xffffffffffffff);
+ { uint128_t x41 = (x39 + x25);
+ { uint64_t x42 = (uint64_t) (x41 >> 0x38);
+ { uint64_t x43 = ((uint64_t)x41 & 0xffffffffffffff);
+ { uint128_t x44 = (x42 + x24);
+ { uint64_t x45 = (uint64_t) (x44 >> 0x38);
+ { uint64_t x46 = ((uint64_t)x44 & 0xffffffffffffff);
+ { uint64_t x47 = (x31 + (0x3 * x45));
+ { uint64_t x48 = (x47 >> 0x38);
+ { uint64_t x49 = (x47 & 0xffffffffffffff);
+ { uint64_t x50 = (x48 + x34);
+ { uint64_t x51 = (x50 >> 0x38);
+ { uint64_t x52 = (x50 & 0xffffffffffffff);
+ out[0] = x49;
+ out[1] = x52;
+ out[2] = (x51 + x37);
+ out[3] = x40;
+ out[4] = x43;
+ out[5] = x46;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e336m3/fesquare.c b/src/Specific/solinas64_2e336m3/fesquare.c
index 2a515d77c..5ccf65c0c 100644
--- a/src/Specific/solinas64_2e336m3/fesquare.c
+++ b/src/Specific/solinas64_2e336m3/fesquare.c
@@ -1,56 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x11 = (((uint128_t)x2 * x9) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x9 * x2))))));
-{ uint128_t x12 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0x3 * ((uint128_t)x9 * x9)));
-{ uint128_t x13 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x3 * (((uint128_t)x10 * x9) + ((uint128_t)x9 * x10))));
-{ uint128_t x14 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x3 * (((uint128_t)x8 * x9) + (((uint128_t)x10 * x10) + ((uint128_t)x9 * x8)))));
-{ uint128_t x15 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x3 * (((uint128_t)x6 * x9) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + ((uint128_t)x9 * x6))))));
-{ uint128_t x16 = (((uint128_t)x2 * x2) + (0x3 * (((uint128_t)x4 * x9) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + ((uint128_t)x9 * x4)))))));
-{ uint64_t x17 = (uint64_t) (x16 >> 0x38);
-{ uint64_t x18 = ((uint64_t)x16 & 0xffffffffffffff);
-{ uint128_t x19 = (x17 + x15);
-{ uint64_t x20 = (uint64_t) (x19 >> 0x38);
-{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffffffff);
-{ uint128_t x22 = (x20 + x14);
-{ uint64_t x23 = (uint64_t) (x22 >> 0x38);
-{ uint64_t x24 = ((uint64_t)x22 & 0xffffffffffffff);
-{ uint128_t x25 = (x23 + x13);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x38);
-{ uint64_t x27 = ((uint64_t)x25 & 0xffffffffffffff);
-{ uint128_t x28 = (x26 + x12);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x38);
-{ uint64_t x30 = ((uint64_t)x28 & 0xffffffffffffff);
-{ uint128_t x31 = (x29 + x11);
-{ uint64_t x32 = (uint64_t) (x31 >> 0x38);
-{ uint64_t x33 = ((uint64_t)x31 & 0xffffffffffffff);
-{ uint64_t x34 = (x18 + (0x3 * x32));
-{ uint64_t x35 = (x34 >> 0x38);
-{ uint64_t x36 = (x34 & 0xffffffffffffff);
-{ uint64_t x37 = (x35 + x21);
-{ uint64_t x38 = (x37 >> 0x38);
-{ uint64_t x39 = (x37 & 0xffffffffffffff);
-out[0] = x33;
-out[1] = x30;
-out[2] = x27;
-out[3] = x38 + x24;
-out[4] = x39;
-out[5] = x36;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesquare(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x11 = (((uint128_t)x2 * x9) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x9 * x2))))));
+ { uint128_t x12 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0x3 * ((uint128_t)x9 * x9)));
+ { uint128_t x13 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x3 * (((uint128_t)x10 * x9) + ((uint128_t)x9 * x10))));
+ { uint128_t x14 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x3 * (((uint128_t)x8 * x9) + (((uint128_t)x10 * x10) + ((uint128_t)x9 * x8)))));
+ { uint128_t x15 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x3 * (((uint128_t)x6 * x9) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + ((uint128_t)x9 * x6))))));
+ { uint128_t x16 = (((uint128_t)x2 * x2) + (0x3 * (((uint128_t)x4 * x9) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + ((uint128_t)x9 * x4)))))));
+ { uint64_t x17 = (uint64_t) (x16 >> 0x38);
+ { uint64_t x18 = ((uint64_t)x16 & 0xffffffffffffff);
+ { uint128_t x19 = (x17 + x15);
+ { uint64_t x20 = (uint64_t) (x19 >> 0x38);
+ { uint64_t x21 = ((uint64_t)x19 & 0xffffffffffffff);
+ { uint128_t x22 = (x20 + x14);
+ { uint64_t x23 = (uint64_t) (x22 >> 0x38);
+ { uint64_t x24 = ((uint64_t)x22 & 0xffffffffffffff);
+ { uint128_t x25 = (x23 + x13);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x38);
+ { uint64_t x27 = ((uint64_t)x25 & 0xffffffffffffff);
+ { uint128_t x28 = (x26 + x12);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x38);
+ { uint64_t x30 = ((uint64_t)x28 & 0xffffffffffffff);
+ { uint128_t x31 = (x29 + x11);
+ { uint64_t x32 = (uint64_t) (x31 >> 0x38);
+ { uint64_t x33 = ((uint64_t)x31 & 0xffffffffffffff);
+ { uint64_t x34 = (x18 + (0x3 * x32));
+ { uint64_t x35 = (x34 >> 0x38);
+ { uint64_t x36 = (x34 & 0xffffffffffffff);
+ { uint64_t x37 = (x35 + x21);
+ { uint64_t x38 = (x37 >> 0x38);
+ { uint64_t x39 = (x37 & 0xffffffffffffff);
+ out[0] = x36;
+ out[1] = x39;
+ out[2] = (x38 + x24);
+ out[3] = x27;
+ out[4] = x30;
+ out[5] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e336m3/freeze.c b/src/Specific/solinas64_2e336m3/freeze.c
index 000e7a80c..c42f18310 100644
--- a/src/Specific/solinas64_2e336m3/freeze.c
+++ b/src/Specific/solinas64_2e336m3/freeze.c
@@ -1,25 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x12;
-out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 56 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xfffffffffffffd;;
+static void freeze(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xfffffffffffffd);
+ { uint64_t x15, uint8_t x16 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x13, Return x4, 0xffffffffffffff);
+ { uint64_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x16, Return x6, 0xffffffffffffff);
+ { uint64_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x19, Return x8, 0xffffffffffffff);
+ { uint64_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x22, Return x10, 0xffffffffffffff);
+ { uint64_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x25, Return x9, 0xffffffffffffff);
+ { uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+ { uint64_t x30 = (x29 & 0xfffffffffffffd);
+ { uint64_t x32, uint8_t x33 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x12, Return x30);
+ { uint64_t x34 = (x29 & 0xffffffffffffff);
+ { uint64_t x36, uint8_t x37 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x33, Return x15, Return x34);
+ { uint64_t x38 = (x29 & 0xffffffffffffff);
+ { uint64_t x40, uint8_t x41 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x37, Return x18, Return x38);
+ { uint64_t x42 = (x29 & 0xffffffffffffff);
+ { uint64_t x44, uint8_t x45 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x41, Return x21, Return x42);
+ { uint64_t x46 = (x29 & 0xffffffffffffff);
+ { uint64_t x48, uint8_t x49 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x45, Return x24, Return x46);
+ { uint64_t x50 = (x29 & 0xffffffffffffff);
+ { uint64_t x52, uint8_t _ = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x49, Return x27, Return x50);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e338m15/femul.c b/src/Specific/solinas64_2e338m15/femul.c
index 91d8d237e..b1b0c9dbc 100644
--- a/src/Specific/solinas64_2e338m15/femul.c
+++ b/src/Specific/solinas64_2e338m15/femul.c
@@ -1,56 +1,50 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
-{ uint128_t x24 = (((uint128_t)x5 * x22) + ((0x2 * ((uint128_t)x7 * x23)) + (((uint128_t)x9 * x21) + (((uint128_t)x11 * x19) + ((0x2 * ((uint128_t)x13 * x17)) + ((uint128_t)x12 * x15))))));
-{ uint128_t x25 = ((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x13 * x15))))) + (0xf * ((uint128_t)x12 * x22)));
-{ uint128_t x26 = ((((uint128_t)x5 * x21) + ((0x2 * ((uint128_t)x7 * x19)) + ((0x2 * ((uint128_t)x9 * x17)) + ((uint128_t)x11 * x15)))) + (0xf * ((0x2 * ((uint128_t)x13 * x22)) + (0x2 * ((uint128_t)x12 * x23)))));
-{ uint128_t x27 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((uint128_t)x9 * x15))) + (0xf * (((uint128_t)x11 * x22) + ((0x2 * ((uint128_t)x13 * x23)) + ((uint128_t)x12 * x21)))));
-{ uint128_t x28 = ((((uint128_t)x5 * x17) + ((uint128_t)x7 * x15)) + (0xf * (((uint128_t)x9 * x22) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x12 * x19))))));
-{ uint128_t x29 = (((uint128_t)x5 * x15) + (0xf * ((0x2 * ((uint128_t)x7 * x22)) + ((0x2 * ((uint128_t)x9 * x23)) + (((uint128_t)x11 * x21) + ((0x2 * ((uint128_t)x13 * x19)) + (0x2 * ((uint128_t)x12 * x17))))))));
-{ uint128_t x30 = (x29 >> 0x39);
-{ uint64_t x31 = ((uint64_t)x29 & 0x1ffffffffffffff);
-{ uint128_t x32 = (x30 + x28);
-{ uint128_t x33 = (x32 >> 0x38);
-{ uint64_t x34 = ((uint64_t)x32 & 0xffffffffffffff);
-{ uint128_t x35 = (x33 + x27);
-{ uint128_t x36 = (x35 >> 0x38);
-{ uint64_t x37 = ((uint64_t)x35 & 0xffffffffffffff);
-{ uint128_t x38 = (x36 + x26);
-{ uint128_t x39 = (x38 >> 0x39);
-{ uint64_t x40 = ((uint64_t)x38 & 0x1ffffffffffffff);
-{ uint128_t x41 = (x39 + x25);
-{ uint128_t x42 = (x41 >> 0x38);
-{ uint64_t x43 = ((uint64_t)x41 & 0xffffffffffffff);
-{ uint128_t x44 = (x42 + x24);
-{ uint64_t x45 = (uint64_t) (x44 >> 0x38);
-{ uint64_t x46 = ((uint64_t)x44 & 0xffffffffffffff);
-{ uint128_t x47 = (x31 + ((uint128_t)0xf * x45));
-{ uint64_t x48 = (uint64_t) (x47 >> 0x39);
-{ uint64_t x49 = ((uint64_t)x47 & 0x1ffffffffffffff);
-{ uint64_t x50 = (x48 + x34);
-{ uint64_t x51 = (x50 >> 0x38);
-{ uint64_t x52 = (x50 & 0xffffffffffffff);
-out[0] = x46;
-out[1] = x43;
-out[2] = x40;
-out[3] = x51 + x37;
-out[4] = x52;
-out[5] = x49;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void femul(uint64_t out[6], const uint64_t in1[6], const uint64_t in2[6]) {
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x22 = in2[5];
+ { const uint64_t x23 = in2[4];
+ { const uint64_t x21 = in2[3];
+ { const uint64_t x19 = in2[2];
+ { const uint64_t x17 = in2[1];
+ { const uint64_t x15 = in2[0];
+ { uint128_t x24 = (((uint128_t)x5 * x22) + ((0x2 * ((uint128_t)x7 * x23)) + (((uint128_t)x9 * x21) + (((uint128_t)x11 * x19) + ((0x2 * ((uint128_t)x13 * x17)) + ((uint128_t)x12 * x15))))));
+ { uint128_t x25 = ((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x13 * x15))))) + (0xf * ((uint128_t)x12 * x22)));
+ { uint128_t x26 = ((((uint128_t)x5 * x21) + ((0x2 * ((uint128_t)x7 * x19)) + ((0x2 * ((uint128_t)x9 * x17)) + ((uint128_t)x11 * x15)))) + (0xf * ((0x2 * ((uint128_t)x13 * x22)) + (0x2 * ((uint128_t)x12 * x23)))));
+ { uint128_t x27 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((uint128_t)x9 * x15))) + (0xf * (((uint128_t)x11 * x22) + ((0x2 * ((uint128_t)x13 * x23)) + ((uint128_t)x12 * x21)))));
+ { uint128_t x28 = ((((uint128_t)x5 * x17) + ((uint128_t)x7 * x15)) + (0xf * (((uint128_t)x9 * x22) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x12 * x19))))));
+ { uint128_t x29 = (((uint128_t)x5 * x15) + (0xf * ((0x2 * ((uint128_t)x7 * x22)) + ((0x2 * ((uint128_t)x9 * x23)) + (((uint128_t)x11 * x21) + ((0x2 * ((uint128_t)x13 * x19)) + (0x2 * ((uint128_t)x12 * x17))))))));
+ { uint128_t x30 = (x29 >> 0x39);
+ { uint64_t x31 = ((uint64_t)x29 & 0x1ffffffffffffff);
+ { uint128_t x32 = (x30 + x28);
+ { uint128_t x33 = (x32 >> 0x38);
+ { uint64_t x34 = ((uint64_t)x32 & 0xffffffffffffff);
+ { uint128_t x35 = (x33 + x27);
+ { uint128_t x36 = (x35 >> 0x38);
+ { uint64_t x37 = ((uint64_t)x35 & 0xffffffffffffff);
+ { uint128_t x38 = (x36 + x26);
+ { uint128_t x39 = (x38 >> 0x39);
+ { uint64_t x40 = ((uint64_t)x38 & 0x1ffffffffffffff);
+ { uint128_t x41 = (x39 + x25);
+ { uint128_t x42 = (x41 >> 0x38);
+ { uint64_t x43 = ((uint64_t)x41 & 0xffffffffffffff);
+ { uint128_t x44 = (x42 + x24);
+ { uint64_t x45 = (uint64_t) (x44 >> 0x38);
+ { uint64_t x46 = ((uint64_t)x44 & 0xffffffffffffff);
+ { uint128_t x47 = (x31 + ((uint128_t)0xf * x45));
+ { uint64_t x48 = (uint64_t) (x47 >> 0x39);
+ { uint64_t x49 = ((uint64_t)x47 & 0x1ffffffffffffff);
+ { uint64_t x50 = (x48 + x34);
+ { uint64_t x51 = (x50 >> 0x38);
+ { uint64_t x52 = (x50 & 0xffffffffffffff);
+ out[0] = x49;
+ out[1] = x52;
+ out[2] = (x51 + x37);
+ out[3] = x40;
+ out[4] = x43;
+ out[5] = x46;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e338m15/fesquare.c b/src/Specific/solinas64_2e338m15/fesquare.c
index 67017c61d..0c19e81c2 100644
--- a/src/Specific/solinas64_2e338m15/fesquare.c
+++ b/src/Specific/solinas64_2e338m15/fesquare.c
@@ -1,56 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x11 = (((uint128_t)x2 * x9) + ((0x2 * ((uint128_t)x4 * x10)) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x9 * x2))))));
-{ uint128_t x12 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0xf * ((uint128_t)x9 * x9)));
-{ uint128_t x13 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0xf * ((0x2 * ((uint128_t)x10 * x9)) + (0x2 * ((uint128_t)x9 * x10)))));
-{ uint128_t x14 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0xf * (((uint128_t)x8 * x9) + ((0x2 * ((uint128_t)x10 * x10)) + ((uint128_t)x9 * x8)))));
-{ uint128_t x15 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xf * (((uint128_t)x6 * x9) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + ((uint128_t)x9 * x6))))));
-{ uint128_t x16 = (((uint128_t)x2 * x2) + (0xf * ((0x2 * ((uint128_t)x4 * x9)) + ((0x2 * ((uint128_t)x6 * x10)) + (((uint128_t)x8 * x8) + ((0x2 * ((uint128_t)x10 * x6)) + (0x2 * ((uint128_t)x9 * x4))))))));
-{ uint128_t x17 = (x16 >> 0x39);
-{ uint64_t x18 = ((uint64_t)x16 & 0x1ffffffffffffff);
-{ uint128_t x19 = (x17 + x15);
-{ uint128_t x20 = (x19 >> 0x38);
-{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffffffff);
-{ uint128_t x22 = (x20 + x14);
-{ uint128_t x23 = (x22 >> 0x38);
-{ uint64_t x24 = ((uint64_t)x22 & 0xffffffffffffff);
-{ uint128_t x25 = (x23 + x13);
-{ uint128_t x26 = (x25 >> 0x39);
-{ uint64_t x27 = ((uint64_t)x25 & 0x1ffffffffffffff);
-{ uint128_t x28 = (x26 + x12);
-{ uint128_t x29 = (x28 >> 0x38);
-{ uint64_t x30 = ((uint64_t)x28 & 0xffffffffffffff);
-{ uint128_t x31 = (x29 + x11);
-{ uint64_t x32 = (uint64_t) (x31 >> 0x38);
-{ uint64_t x33 = ((uint64_t)x31 & 0xffffffffffffff);
-{ uint128_t x34 = (x18 + ((uint128_t)0xf * x32));
-{ uint64_t x35 = (uint64_t) (x34 >> 0x39);
-{ uint64_t x36 = ((uint64_t)x34 & 0x1ffffffffffffff);
-{ uint64_t x37 = (x35 + x21);
-{ uint64_t x38 = (x37 >> 0x38);
-{ uint64_t x39 = (x37 & 0xffffffffffffff);
-out[0] = x33;
-out[1] = x30;
-out[2] = x27;
-out[3] = x38 + x24;
-out[4] = x39;
-out[5] = x36;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[6];
+static void fesquare(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x11 = (((uint128_t)x2 * x9) + ((0x2 * ((uint128_t)x4 * x10)) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x9 * x2))))));
+ { uint128_t x12 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0xf * ((uint128_t)x9 * x9)));
+ { uint128_t x13 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0xf * ((0x2 * ((uint128_t)x10 * x9)) + (0x2 * ((uint128_t)x9 * x10)))));
+ { uint128_t x14 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0xf * (((uint128_t)x8 * x9) + ((0x2 * ((uint128_t)x10 * x10)) + ((uint128_t)x9 * x8)))));
+ { uint128_t x15 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xf * (((uint128_t)x6 * x9) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + ((uint128_t)x9 * x6))))));
+ { uint128_t x16 = (((uint128_t)x2 * x2) + (0xf * ((0x2 * ((uint128_t)x4 * x9)) + ((0x2 * ((uint128_t)x6 * x10)) + (((uint128_t)x8 * x8) + ((0x2 * ((uint128_t)x10 * x6)) + (0x2 * ((uint128_t)x9 * x4))))))));
+ { uint128_t x17 = (x16 >> 0x39);
+ { uint64_t x18 = ((uint64_t)x16 & 0x1ffffffffffffff);
+ { uint128_t x19 = (x17 + x15);
+ { uint128_t x20 = (x19 >> 0x38);
+ { uint64_t x21 = ((uint64_t)x19 & 0xffffffffffffff);
+ { uint128_t x22 = (x20 + x14);
+ { uint128_t x23 = (x22 >> 0x38);
+ { uint64_t x24 = ((uint64_t)x22 & 0xffffffffffffff);
+ { uint128_t x25 = (x23 + x13);
+ { uint128_t x26 = (x25 >> 0x39);
+ { uint64_t x27 = ((uint64_t)x25 & 0x1ffffffffffffff);
+ { uint128_t x28 = (x26 + x12);
+ { uint128_t x29 = (x28 >> 0x38);
+ { uint64_t x30 = ((uint64_t)x28 & 0xffffffffffffff);
+ { uint128_t x31 = (x29 + x11);
+ { uint64_t x32 = (uint64_t) (x31 >> 0x38);
+ { uint64_t x33 = ((uint64_t)x31 & 0xffffffffffffff);
+ { uint128_t x34 = (x18 + ((uint128_t)0xf * x32));
+ { uint64_t x35 = (uint64_t) (x34 >> 0x39);
+ { uint64_t x36 = ((uint64_t)x34 & 0x1ffffffffffffff);
+ { uint64_t x37 = (x35 + x21);
+ { uint64_t x38 = (x37 >> 0x38);
+ { uint64_t x39 = (x37 & 0xffffffffffffff);
+ out[0] = x36;
+ out[1] = x39;
+ out[2] = (x38 + x24);
+ out[3] = x27;
+ out[4] = x30;
+ out[5] = x33;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e338m15/freeze.c b/src/Specific/solinas64_2e338m15/freeze.c
index 4f7156067..3a6a3e217 100644
--- a/src/Specific/solinas64_2e338m15/freeze.c
+++ b/src/Specific/solinas64_2e338m15/freeze.c
@@ -1,25 +1,34 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x12;
-out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 57 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x1fffffffffffff1;;
+static void freeze(uint64_t out[6], const uint64_t in1[6]) {
+ { const uint64_t x9 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x12, uint8_t x13 = Op (Syntax.SubWithGetBorrow 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x1fffffffffffff1);
+ { uint64_t x15, uint8_t x16 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x13, Return x4, 0xffffffffffffff);
+ { uint64_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x16, Return x6, 0xffffffffffffff);
+ { uint64_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x19, Return x8, 0x1ffffffffffffff);
+ { uint64_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x22, Return x10, 0xffffffffffffff);
+ { uint64_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x25, Return x9, 0xffffffffffffff);
+ { uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+ { uint64_t x30 = (x29 & 0x1fffffffffffff1);
+ { uint64_t x32, uint8_t x33 = Op (Syntax.AddWithGetCarry 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x12, Return x30);
+ { uint64_t x34 = (x29 & 0xffffffffffffff);
+ { uint64_t x36, uint8_t x37 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x33, Return x15, Return x34);
+ { uint64_t x38 = (x29 & 0xffffffffffffff);
+ { uint64_t x40, uint8_t x41 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x37, Return x18, Return x38);
+ { uint64_t x42 = (x29 & 0x1ffffffffffffff);
+ { uint64_t x44, uint8_t x45 = Op (Syntax.AddWithGetCarry 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x41, Return x21, Return x42);
+ { uint64_t x46 = (x29 & 0xffffffffffffff);
+ { uint64_t x48, uint8_t x49 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x45, Return x24, Return x46);
+ { uint64_t x50 = (x29 & 0xffffffffffffff);
+ { uint64_t x52, uint8_t _ = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x49, Return x27, Return x50);
+ out[0] = x32;
+ out[1] = x36;
+ out[2] = x40;
+ out[3] = x44;
+ out[4] = x48;
+ out[5] = x52;
+ }}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e369m25/femul.c b/src/Specific/solinas64_2e369m25/femul.c
index d58b08e29..5c6fbded1 100644
--- a/src/Specific/solinas64_2e369m25/femul.c
+++ b/src/Specific/solinas64_2e369m25/femul.c
@@ -1,66 +1,64 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint128_t x32 = (((uint128_t)x5 * x30) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + ((0x2 * ((uint128_t)x11 * x27)) + ((0x2 * ((uint128_t)x13 * x25)) + ((0x2 * ((uint128_t)x15 * x23)) + ((0x2 * ((uint128_t)x17 * x21)) + ((uint128_t)x16 * x19))))))));
-{ uint128_t x33 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((0x2 * ((uint128_t)x13 * x23)) + ((0x2 * ((uint128_t)x15 * x21)) + ((uint128_t)x17 * x19))))))) + (0x19 * ((uint128_t)x16 * x30)));
-{ uint128_t x34 = ((((uint128_t)x5 * x29) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((0x2 * ((uint128_t)x11 * x23)) + ((0x2 * ((uint128_t)x13 * x21)) + ((uint128_t)x15 * x19)))))) + (0x19 * (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))));
-{ uint128_t x35 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((0x2 * ((uint128_t)x9 * x23)) + ((0x2 * ((uint128_t)x11 * x21)) + ((uint128_t)x13 * x19))))) + (0x19 * (((uint128_t)x15 * x30) + (((uint128_t)x17 * x31) + ((uint128_t)x16 * x29)))));
-{ uint128_t x36 = ((((uint128_t)x5 * x25) + ((0x2 * ((uint128_t)x7 * x23)) + ((0x2 * ((uint128_t)x9 * x21)) + ((uint128_t)x11 * x19)))) + (0x19 * (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27))))));
-{ uint128_t x37 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((uint128_t)x9 * x19))) + (0x19 * (((uint128_t)x11 * x30) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + (((uint128_t)x17 * x27) + ((uint128_t)x16 * x25)))))));
-{ uint128_t x38 = ((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (0x19 * (((uint128_t)x9 * x30) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + ((uint128_t)x16 * x23))))))));
-{ uint128_t x39 = (((uint128_t)x5 * x19) + (0x19 * ((0x2 * ((uint128_t)x7 * x30)) + ((0x2 * ((uint128_t)x9 * x31)) + ((0x2 * ((uint128_t)x11 * x29)) + ((0x2 * ((uint128_t)x13 * x27)) + ((0x2 * ((uint128_t)x15 * x25)) + ((0x2 * ((uint128_t)x17 * x23)) + (0x2 * ((uint128_t)x16 * x21))))))))));
-{ uint64_t x40 = (uint64_t) (x39 >> 0x2f);
-{ uint64_t x41 = ((uint64_t)x39 & 0x7fffffffffff);
-{ uint128_t x42 = (x40 + x38);
-{ uint64_t x43 = (uint64_t) (x42 >> 0x2e);
-{ uint64_t x44 = ((uint64_t)x42 & 0x3fffffffffff);
-{ uint128_t x45 = (x43 + x37);
-{ uint64_t x46 = (uint64_t) (x45 >> 0x2e);
-{ uint64_t x47 = ((uint64_t)x45 & 0x3fffffffffff);
-{ uint128_t x48 = (x46 + x36);
-{ uint64_t x49 = (uint64_t) (x48 >> 0x2e);
-{ uint64_t x50 = ((uint64_t)x48 & 0x3fffffffffff);
-{ uint128_t x51 = (x49 + x35);
-{ uint64_t x52 = (uint64_t) (x51 >> 0x2e);
-{ uint64_t x53 = ((uint64_t)x51 & 0x3fffffffffff);
-{ uint128_t x54 = (x52 + x34);
-{ uint64_t x55 = (uint64_t) (x54 >> 0x2e);
-{ uint64_t x56 = ((uint64_t)x54 & 0x3fffffffffff);
-{ uint128_t x57 = (x55 + x33);
-{ uint64_t x58 = (uint64_t) (x57 >> 0x2e);
-{ uint64_t x59 = ((uint64_t)x57 & 0x3fffffffffff);
-{ uint128_t x60 = (x58 + x32);
-{ uint64_t x61 = (uint64_t) (x60 >> 0x2e);
-{ uint64_t x62 = ((uint64_t)x60 & 0x3fffffffffff);
-{ uint64_t x63 = (x41 + (0x19 * x61));
-{ uint64_t x64 = (x63 >> 0x2f);
-{ uint64_t x65 = (x63 & 0x7fffffffffff);
-{ uint64_t x66 = (x64 + x44);
-{ uint64_t x67 = (x66 >> 0x2e);
-{ uint64_t x68 = (x66 & 0x3fffffffffff);
-out[0] = x62;
-out[1] = x59;
-out[2] = x56;
-out[3] = x53;
-out[4] = x50;
-out[5] = x67 + x47;
-out[6] = x68;
-out[7] = x65;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void femul(uint64_t out[8], const uint64_t in1[8], const uint64_t in2[8]) {
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x30 = in2[7];
+ { const uint64_t x31 = in2[6];
+ { const uint64_t x29 = in2[5];
+ { const uint64_t x27 = in2[4];
+ { const uint64_t x25 = in2[3];
+ { const uint64_t x23 = in2[2];
+ { const uint64_t x21 = in2[1];
+ { const uint64_t x19 = in2[0];
+ { uint128_t x32 = (((uint128_t)x5 * x30) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + ((0x2 * ((uint128_t)x11 * x27)) + ((0x2 * ((uint128_t)x13 * x25)) + ((0x2 * ((uint128_t)x15 * x23)) + ((0x2 * ((uint128_t)x17 * x21)) + ((uint128_t)x16 * x19))))))));
+ { uint128_t x33 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((0x2 * ((uint128_t)x13 * x23)) + ((0x2 * ((uint128_t)x15 * x21)) + ((uint128_t)x17 * x19))))))) + (0x19 * ((uint128_t)x16 * x30)));
+ { uint128_t x34 = ((((uint128_t)x5 * x29) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((0x2 * ((uint128_t)x11 * x23)) + ((0x2 * ((uint128_t)x13 * x21)) + ((uint128_t)x15 * x19)))))) + (0x19 * (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))));
+ { uint128_t x35 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((0x2 * ((uint128_t)x9 * x23)) + ((0x2 * ((uint128_t)x11 * x21)) + ((uint128_t)x13 * x19))))) + (0x19 * (((uint128_t)x15 * x30) + (((uint128_t)x17 * x31) + ((uint128_t)x16 * x29)))));
+ { uint128_t x36 = ((((uint128_t)x5 * x25) + ((0x2 * ((uint128_t)x7 * x23)) + ((0x2 * ((uint128_t)x9 * x21)) + ((uint128_t)x11 * x19)))) + (0x19 * (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27))))));
+ { uint128_t x37 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((uint128_t)x9 * x19))) + (0x19 * (((uint128_t)x11 * x30) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + (((uint128_t)x17 * x27) + ((uint128_t)x16 * x25)))))));
+ { uint128_t x38 = ((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (0x19 * (((uint128_t)x9 * x30) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + ((uint128_t)x16 * x23))))))));
+ { uint128_t x39 = (((uint128_t)x5 * x19) + (0x19 * ((0x2 * ((uint128_t)x7 * x30)) + ((0x2 * ((uint128_t)x9 * x31)) + ((0x2 * ((uint128_t)x11 * x29)) + ((0x2 * ((uint128_t)x13 * x27)) + ((0x2 * ((uint128_t)x15 * x25)) + ((0x2 * ((uint128_t)x17 * x23)) + (0x2 * ((uint128_t)x16 * x21))))))))));
+ { uint64_t x40 = (uint64_t) (x39 >> 0x2f);
+ { uint64_t x41 = ((uint64_t)x39 & 0x7fffffffffff);
+ { uint128_t x42 = (x40 + x38);
+ { uint64_t x43 = (uint64_t) (x42 >> 0x2e);
+ { uint64_t x44 = ((uint64_t)x42 & 0x3fffffffffff);
+ { uint128_t x45 = (x43 + x37);
+ { uint64_t x46 = (uint64_t) (x45 >> 0x2e);
+ { uint64_t x47 = ((uint64_t)x45 & 0x3fffffffffff);
+ { uint128_t x48 = (x46 + x36);
+ { uint64_t x49 = (uint64_t) (x48 >> 0x2e);
+ { uint64_t x50 = ((uint64_t)x48 & 0x3fffffffffff);
+ { uint128_t x51 = (x49 + x35);
+ { uint64_t x52 = (uint64_t) (x51 >> 0x2e);
+ { uint64_t x53 = ((uint64_t)x51 & 0x3fffffffffff);
+ { uint128_t x54 = (x52 + x34);
+ { uint64_t x55 = (uint64_t) (x54 >> 0x2e);
+ { uint64_t x56 = ((uint64_t)x54 & 0x3fffffffffff);
+ { uint128_t x57 = (x55 + x33);
+ { uint64_t x58 = (uint64_t) (x57 >> 0x2e);
+ { uint64_t x59 = ((uint64_t)x57 & 0x3fffffffffff);
+ { uint128_t x60 = (x58 + x32);
+ { uint64_t x61 = (uint64_t) (x60 >> 0x2e);
+ { uint64_t x62 = ((uint64_t)x60 & 0x3fffffffffff);
+ { uint64_t x63 = (x41 + (0x19 * x61));
+ { uint64_t x64 = (x63 >> 0x2f);
+ { uint64_t x65 = (x63 & 0x7fffffffffff);
+ { uint64_t x66 = (x64 + x44);
+ { uint64_t x67 = (x66 >> 0x2e);
+ { uint64_t x68 = (x66 & 0x3fffffffffff);
+ out[0] = x65;
+ out[1] = x68;
+ out[2] = (x67 + x47);
+ out[3] = x50;
+ out[4] = x53;
+ out[5] = x56;
+ out[6] = x59;
+ out[7] = x62;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e369m25/fesquare.c b/src/Specific/solinas64_2e369m25/fesquare.c
index e4fdbf115..af582164e 100644
--- a/src/Specific/solinas64_2e369m25/fesquare.c
+++ b/src/Specific/solinas64_2e369m25/fesquare.c
@@ -1,66 +1,56 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x15 = (((uint128_t)x2 * x13) + ((0x2 * ((uint128_t)x4 * x14)) + ((0x2 * ((uint128_t)x6 * x12)) + ((0x2 * ((uint128_t)x8 * x10)) + ((0x2 * ((uint128_t)x10 * x8)) + ((0x2 * ((uint128_t)x12 * x6)) + ((0x2 * ((uint128_t)x14 * x4)) + ((uint128_t)x13 * x2))))))));
-{ uint128_t x16 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + ((0x2 * ((uint128_t)x6 * x10)) + ((0x2 * ((uint128_t)x8 * x8)) + ((0x2 * ((uint128_t)x10 * x6)) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x19 * ((uint128_t)x13 * x13)));
-{ uint128_t x17 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0x19 * (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))));
-{ uint128_t x18 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x19 * (((uint128_t)x12 * x13) + (((uint128_t)x14 * x14) + ((uint128_t)x13 * x12)))));
-{ uint128_t x19 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x19 * (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10))))));
-{ uint128_t x20 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x19 * (((uint128_t)x8 * x13) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + (((uint128_t)x14 * x10) + ((uint128_t)x13 * x8)))))));
-{ uint128_t x21 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x19 * (((uint128_t)x6 * x13) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + ((uint128_t)x13 * x6))))))));
-{ uint128_t x22 = (((uint128_t)x2 * x2) + (0x19 * ((0x2 * ((uint128_t)x4 * x13)) + ((0x2 * ((uint128_t)x6 * x14)) + ((0x2 * ((uint128_t)x8 * x12)) + ((0x2 * ((uint128_t)x10 * x10)) + ((0x2 * ((uint128_t)x12 * x8)) + ((0x2 * ((uint128_t)x14 * x6)) + (0x2 * ((uint128_t)x13 * x4))))))))));
-{ uint64_t x23 = (uint64_t) (x22 >> 0x2f);
-{ uint64_t x24 = ((uint64_t)x22 & 0x7fffffffffff);
-{ uint128_t x25 = (x23 + x21);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x2e);
-{ uint64_t x27 = ((uint64_t)x25 & 0x3fffffffffff);
-{ uint128_t x28 = (x26 + x20);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x2e);
-{ uint64_t x30 = ((uint64_t)x28 & 0x3fffffffffff);
-{ uint128_t x31 = (x29 + x19);
-{ uint64_t x32 = (uint64_t) (x31 >> 0x2e);
-{ uint64_t x33 = ((uint64_t)x31 & 0x3fffffffffff);
-{ uint128_t x34 = (x32 + x18);
-{ uint64_t x35 = (uint64_t) (x34 >> 0x2e);
-{ uint64_t x36 = ((uint64_t)x34 & 0x3fffffffffff);
-{ uint128_t x37 = (x35 + x17);
-{ uint64_t x38 = (uint64_t) (x37 >> 0x2e);
-{ uint64_t x39 = ((uint64_t)x37 & 0x3fffffffffff);
-{ uint128_t x40 = (x38 + x16);
-{ uint64_t x41 = (uint64_t) (x40 >> 0x2e);
-{ uint64_t x42 = ((uint64_t)x40 & 0x3fffffffffff);
-{ uint128_t x43 = (x41 + x15);
-{ uint64_t x44 = (uint64_t) (x43 >> 0x2e);
-{ uint64_t x45 = ((uint64_t)x43 & 0x3fffffffffff);
-{ uint64_t x46 = (x24 + (0x19 * x44));
-{ uint64_t x47 = (x46 >> 0x2f);
-{ uint64_t x48 = (x46 & 0x7fffffffffff);
-{ uint64_t x49 = (x47 + x27);
-{ uint64_t x50 = (x49 >> 0x2e);
-{ uint64_t x51 = (x49 & 0x3fffffffffff);
-out[0] = x45;
-out[1] = x42;
-out[2] = x39;
-out[3] = x36;
-out[4] = x33;
-out[5] = x50 + x30;
-out[6] = x51;
-out[7] = x48;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void fesquare(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x15 = (((uint128_t)x2 * x13) + ((0x2 * ((uint128_t)x4 * x14)) + ((0x2 * ((uint128_t)x6 * x12)) + ((0x2 * ((uint128_t)x8 * x10)) + ((0x2 * ((uint128_t)x10 * x8)) + ((0x2 * ((uint128_t)x12 * x6)) + ((0x2 * ((uint128_t)x14 * x4)) + ((uint128_t)x13 * x2))))))));
+ { uint128_t x16 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + ((0x2 * ((uint128_t)x6 * x10)) + ((0x2 * ((uint128_t)x8 * x8)) + ((0x2 * ((uint128_t)x10 * x6)) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x19 * ((uint128_t)x13 * x13)));
+ { uint128_t x17 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0x19 * (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))));
+ { uint128_t x18 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x19 * (((uint128_t)x12 * x13) + (((uint128_t)x14 * x14) + ((uint128_t)x13 * x12)))));
+ { uint128_t x19 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x19 * (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10))))));
+ { uint128_t x20 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x19 * (((uint128_t)x8 * x13) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + (((uint128_t)x14 * x10) + ((uint128_t)x13 * x8)))))));
+ { uint128_t x21 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x19 * (((uint128_t)x6 * x13) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + ((uint128_t)x13 * x6))))))));
+ { uint128_t x22 = (((uint128_t)x2 * x2) + (0x19 * ((0x2 * ((uint128_t)x4 * x13)) + ((0x2 * ((uint128_t)x6 * x14)) + ((0x2 * ((uint128_t)x8 * x12)) + ((0x2 * ((uint128_t)x10 * x10)) + ((0x2 * ((uint128_t)x12 * x8)) + ((0x2 * ((uint128_t)x14 * x6)) + (0x2 * ((uint128_t)x13 * x4))))))))));
+ { uint64_t x23 = (uint64_t) (x22 >> 0x2f);
+ { uint64_t x24 = ((uint64_t)x22 & 0x7fffffffffff);
+ { uint128_t x25 = (x23 + x21);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x2e);
+ { uint64_t x27 = ((uint64_t)x25 & 0x3fffffffffff);
+ { uint128_t x28 = (x26 + x20);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x2e);
+ { uint64_t x30 = ((uint64_t)x28 & 0x3fffffffffff);
+ { uint128_t x31 = (x29 + x19);
+ { uint64_t x32 = (uint64_t) (x31 >> 0x2e);
+ { uint64_t x33 = ((uint64_t)x31 & 0x3fffffffffff);
+ { uint128_t x34 = (x32 + x18);
+ { uint64_t x35 = (uint64_t) (x34 >> 0x2e);
+ { uint64_t x36 = ((uint64_t)x34 & 0x3fffffffffff);
+ { uint128_t x37 = (x35 + x17);
+ { uint64_t x38 = (uint64_t) (x37 >> 0x2e);
+ { uint64_t x39 = ((uint64_t)x37 & 0x3fffffffffff);
+ { uint128_t x40 = (x38 + x16);
+ { uint64_t x41 = (uint64_t) (x40 >> 0x2e);
+ { uint64_t x42 = ((uint64_t)x40 & 0x3fffffffffff);
+ { uint128_t x43 = (x41 + x15);
+ { uint64_t x44 = (uint64_t) (x43 >> 0x2e);
+ { uint64_t x45 = ((uint64_t)x43 & 0x3fffffffffff);
+ { uint64_t x46 = (x24 + (0x19 * x44));
+ { uint64_t x47 = (x46 >> 0x2f);
+ { uint64_t x48 = (x46 & 0x7fffffffffff);
+ { uint64_t x49 = (x47 + x27);
+ { uint64_t x50 = (x49 >> 0x2e);
+ { uint64_t x51 = (x49 & 0x3fffffffffff);
+ out[0] = x48;
+ out[1] = x51;
+ out[2] = (x50 + x30);
+ out[3] = x33;
+ out[4] = x36;
+ out[5] = x39;
+ out[6] = x42;
+ out[7] = x45;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e369m25/freeze.c b/src/Specific/solinas64_2e369m25/freeze.c
index 780892147..3a9c4c13b 100644
--- a/src/Specific/solinas64_2e369m25/freeze.c
+++ b/src/Specific/solinas64_2e369m25/freeze.c
@@ -1,25 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x16;
-out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 47 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7fffffffffe7;;
+static void freeze(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x7fffffffffe7);
+ { uint64_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x17, Return x4, 0x3fffffffffff);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x20, Return x6, 0x3fffffffffff);
+ { uint64_t x25, uint8_t x26 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x8, 0x3fffffffffff);
+ { uint64_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x26, Return x10, 0x3fffffffffff);
+ { uint64_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x29, Return x12, 0x3fffffffffff);
+ { uint64_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x32, Return x14, 0x3fffffffffff);
+ { uint64_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x35, Return x13, 0x3fffffffffff);
+ { uint64_t x39 = (uint64_t)cmovznz(x38, 0x0, 0xffffffffffffffffL);
+ { uint64_t x40 = (x39 & 0x7fffffffffe7);
+ { uint64_t x42, uint8_t x43 = Op (Syntax.AddWithGetCarry 47 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x16, Return x40);
+ { uint64_t x44 = (x39 & 0x3fffffffffff);
+ { uint64_t x46, uint8_t x47 = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x43, Return x19, Return x44);
+ { uint64_t x48 = (x39 & 0x3fffffffffff);
+ { uint64_t x50, uint8_t x51 = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x47, Return x22, Return x48);
+ { uint64_t x52 = (x39 & 0x3fffffffffff);
+ { uint64_t x54, uint8_t x55 = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x51, Return x25, Return x52);
+ { uint64_t x56 = (x39 & 0x3fffffffffff);
+ { uint64_t x58, uint8_t x59 = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x55, Return x28, Return x56);
+ { uint64_t x60 = (x39 & 0x3fffffffffff);
+ { uint64_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x59, Return x31, Return x60);
+ { uint64_t x64 = (x39 & 0x3fffffffffff);
+ { uint64_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x63, Return x34, Return x64);
+ { uint64_t x68 = (x39 & 0x3fffffffffff);
+ { uint64_t x70, uint8_t _ = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x67, Return x37, Return x68);
+ out[0] = x42;
+ out[1] = x46;
+ out[2] = x50;
+ out[3] = x54;
+ out[4] = x58;
+ out[5] = x62;
+ out[6] = x66;
+ out[7] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e379m19/femul.c b/src/Specific/solinas64_2e379m19/femul.c
index 47cf1b36d..13915c820 100644
--- a/src/Specific/solinas64_2e379m19/femul.c
+++ b/src/Specific/solinas64_2e379m19/femul.c
@@ -1,61 +1,57 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint128_t x28 = (((uint128_t)x5 * x26) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((0x2 * ((uint128_t)x11 * x23)) + ((0x2 * ((uint128_t)x13 * x21)) + ((0x2 * ((uint128_t)x15 * x19)) + ((uint128_t)x14 * x17)))))));
-{ uint128_t x29 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((0x2 * ((uint128_t)x9 * x23)) + ((0x2 * ((uint128_t)x11 * x21)) + ((0x2 * ((uint128_t)x13 * x19)) + ((uint128_t)x15 * x17)))))) + (0x13 * ((uint128_t)x14 * x26)));
-{ uint128_t x30 = ((((uint128_t)x5 * x25) + ((0x2 * ((uint128_t)x7 * x23)) + ((0x2 * ((uint128_t)x9 * x21)) + ((0x2 * ((uint128_t)x11 * x19)) + ((uint128_t)x13 * x17))))) + (0x13 * (((uint128_t)x15 * x26) + ((uint128_t)x14 * x27))));
-{ uint128_t x31 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((0x2 * ((uint128_t)x9 * x19)) + ((uint128_t)x11 * x17)))) + (0x13 * (((uint128_t)x13 * x26) + (((uint128_t)x15 * x27) + ((uint128_t)x14 * x25)))));
-{ uint128_t x32 = ((((uint128_t)x5 * x21) + ((0x2 * ((uint128_t)x7 * x19)) + ((uint128_t)x9 * x17))) + (0x13 * (((uint128_t)x11 * x26) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + ((uint128_t)x14 * x23))))));
-{ uint128_t x33 = ((((uint128_t)x5 * x19) + ((uint128_t)x7 * x17)) + (0x13 * (((uint128_t)x9 * x26) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + ((uint128_t)x14 * x21)))))));
-{ uint128_t x34 = (((uint128_t)x5 * x17) + (0x13 * ((0x2 * ((uint128_t)x7 * x26)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((0x2 * ((uint128_t)x13 * x23)) + ((0x2 * ((uint128_t)x15 * x21)) + (0x2 * ((uint128_t)x14 * x19)))))))));
-{ uint128_t x35 = (x34 >> 0x37);
-{ uint64_t x36 = ((uint64_t)x34 & 0x7fffffffffffff);
-{ uint128_t x37 = (x35 + x33);
-{ uint128_t x38 = (x37 >> 0x36);
-{ uint64_t x39 = ((uint64_t)x37 & 0x3fffffffffffff);
-{ uint128_t x40 = (x38 + x32);
-{ uint64_t x41 = (uint64_t) (x40 >> 0x36);
-{ uint64_t x42 = ((uint64_t)x40 & 0x3fffffffffffff);
-{ uint128_t x43 = (x41 + x31);
-{ uint64_t x44 = (uint64_t) (x43 >> 0x36);
-{ uint64_t x45 = ((uint64_t)x43 & 0x3fffffffffffff);
-{ uint128_t x46 = (x44 + x30);
-{ uint64_t x47 = (uint64_t) (x46 >> 0x36);
-{ uint64_t x48 = ((uint64_t)x46 & 0x3fffffffffffff);
-{ uint128_t x49 = (x47 + x29);
-{ uint64_t x50 = (uint64_t) (x49 >> 0x36);
-{ uint64_t x51 = ((uint64_t)x49 & 0x3fffffffffffff);
-{ uint128_t x52 = (x50 + x28);
-{ uint64_t x53 = (uint64_t) (x52 >> 0x36);
-{ uint64_t x54 = ((uint64_t)x52 & 0x3fffffffffffff);
-{ uint128_t x55 = (x36 + ((uint128_t)0x13 * x53));
-{ uint64_t x56 = (uint64_t) (x55 >> 0x37);
-{ uint64_t x57 = ((uint64_t)x55 & 0x7fffffffffffff);
-{ uint64_t x58 = (x56 + x39);
-{ uint64_t x59 = (x58 >> 0x36);
-{ uint64_t x60 = (x58 & 0x3fffffffffffff);
-out[0] = x54;
-out[1] = x51;
-out[2] = x48;
-out[3] = x45;
-out[4] = x59 + x42;
-out[5] = x60;
-out[6] = x57;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void femul(uint64_t out[7], const uint64_t in1[7], const uint64_t in2[7]) {
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x26 = in2[6];
+ { const uint64_t x27 = in2[5];
+ { const uint64_t x25 = in2[4];
+ { const uint64_t x23 = in2[3];
+ { const uint64_t x21 = in2[2];
+ { const uint64_t x19 = in2[1];
+ { const uint64_t x17 = in2[0];
+ { uint128_t x28 = (((uint128_t)x5 * x26) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((0x2 * ((uint128_t)x11 * x23)) + ((0x2 * ((uint128_t)x13 * x21)) + ((0x2 * ((uint128_t)x15 * x19)) + ((uint128_t)x14 * x17)))))));
+ { uint128_t x29 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((0x2 * ((uint128_t)x9 * x23)) + ((0x2 * ((uint128_t)x11 * x21)) + ((0x2 * ((uint128_t)x13 * x19)) + ((uint128_t)x15 * x17)))))) + (0x13 * ((uint128_t)x14 * x26)));
+ { uint128_t x30 = ((((uint128_t)x5 * x25) + ((0x2 * ((uint128_t)x7 * x23)) + ((0x2 * ((uint128_t)x9 * x21)) + ((0x2 * ((uint128_t)x11 * x19)) + ((uint128_t)x13 * x17))))) + (0x13 * (((uint128_t)x15 * x26) + ((uint128_t)x14 * x27))));
+ { uint128_t x31 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((0x2 * ((uint128_t)x9 * x19)) + ((uint128_t)x11 * x17)))) + (0x13 * (((uint128_t)x13 * x26) + (((uint128_t)x15 * x27) + ((uint128_t)x14 * x25)))));
+ { uint128_t x32 = ((((uint128_t)x5 * x21) + ((0x2 * ((uint128_t)x7 * x19)) + ((uint128_t)x9 * x17))) + (0x13 * (((uint128_t)x11 * x26) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + ((uint128_t)x14 * x23))))));
+ { uint128_t x33 = ((((uint128_t)x5 * x19) + ((uint128_t)x7 * x17)) + (0x13 * (((uint128_t)x9 * x26) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + ((uint128_t)x14 * x21)))))));
+ { uint128_t x34 = (((uint128_t)x5 * x17) + (0x13 * ((0x2 * ((uint128_t)x7 * x26)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((0x2 * ((uint128_t)x13 * x23)) + ((0x2 * ((uint128_t)x15 * x21)) + (0x2 * ((uint128_t)x14 * x19)))))))));
+ { uint128_t x35 = (x34 >> 0x37);
+ { uint64_t x36 = ((uint64_t)x34 & 0x7fffffffffffff);
+ { uint128_t x37 = (x35 + x33);
+ { uint128_t x38 = (x37 >> 0x36);
+ { uint64_t x39 = ((uint64_t)x37 & 0x3fffffffffffff);
+ { uint128_t x40 = (x38 + x32);
+ { uint64_t x41 = (uint64_t) (x40 >> 0x36);
+ { uint64_t x42 = ((uint64_t)x40 & 0x3fffffffffffff);
+ { uint128_t x43 = (x41 + x31);
+ { uint64_t x44 = (uint64_t) (x43 >> 0x36);
+ { uint64_t x45 = ((uint64_t)x43 & 0x3fffffffffffff);
+ { uint128_t x46 = (x44 + x30);
+ { uint64_t x47 = (uint64_t) (x46 >> 0x36);
+ { uint64_t x48 = ((uint64_t)x46 & 0x3fffffffffffff);
+ { uint128_t x49 = (x47 + x29);
+ { uint64_t x50 = (uint64_t) (x49 >> 0x36);
+ { uint64_t x51 = ((uint64_t)x49 & 0x3fffffffffffff);
+ { uint128_t x52 = (x50 + x28);
+ { uint64_t x53 = (uint64_t) (x52 >> 0x36);
+ { uint64_t x54 = ((uint64_t)x52 & 0x3fffffffffffff);
+ { uint128_t x55 = (x36 + ((uint128_t)0x13 * x53));
+ { uint64_t x56 = (uint64_t) (x55 >> 0x37);
+ { uint64_t x57 = ((uint64_t)x55 & 0x7fffffffffffff);
+ { uint64_t x58 = (x56 + x39);
+ { uint64_t x59 = (x58 >> 0x36);
+ { uint64_t x60 = (x58 & 0x3fffffffffffff);
+ out[0] = x57;
+ out[1] = x60;
+ out[2] = (x59 + x42);
+ out[3] = x45;
+ out[4] = x48;
+ out[5] = x51;
+ out[6] = x54;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e379m19/fesquare.c b/src/Specific/solinas64_2e379m19/fesquare.c
index cfe4b4a05..e2ddf49ac 100644
--- a/src/Specific/solinas64_2e379m19/fesquare.c
+++ b/src/Specific/solinas64_2e379m19/fesquare.c
@@ -1,61 +1,50 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x13 = (((uint128_t)x2 * x11) + ((0x2 * ((uint128_t)x4 * x12)) + ((0x2 * ((uint128_t)x6 * x10)) + ((0x2 * ((uint128_t)x8 * x8)) + ((0x2 * ((uint128_t)x10 * x6)) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x11 * x2)))))));
-{ uint128_t x14 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0x13 * ((uint128_t)x11 * x11)));
-{ uint128_t x15 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x13 * (((uint128_t)x12 * x11) + ((uint128_t)x11 * x12))));
-{ uint128_t x16 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x13 * (((uint128_t)x10 * x11) + (((uint128_t)x12 * x12) + ((uint128_t)x11 * x10)))));
-{ uint128_t x17 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x13 * (((uint128_t)x8 * x11) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + ((uint128_t)x11 * x8))))));
-{ uint128_t x18 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x13 * (((uint128_t)x6 * x11) + (((uint128_t)x8 * x12) + (((uint128_t)x10 * x10) + (((uint128_t)x12 * x8) + ((uint128_t)x11 * x6)))))));
-{ uint128_t x19 = (((uint128_t)x2 * x2) + (0x13 * ((0x2 * ((uint128_t)x4 * x11)) + ((0x2 * ((uint128_t)x6 * x12)) + ((0x2 * ((uint128_t)x8 * x10)) + ((0x2 * ((uint128_t)x10 * x8)) + ((0x2 * ((uint128_t)x12 * x6)) + (0x2 * ((uint128_t)x11 * x4)))))))));
-{ uint128_t x20 = (x19 >> 0x37);
-{ uint64_t x21 = ((uint64_t)x19 & 0x7fffffffffffff);
-{ uint128_t x22 = (x20 + x18);
-{ uint128_t x23 = (x22 >> 0x36);
-{ uint64_t x24 = ((uint64_t)x22 & 0x3fffffffffffff);
-{ uint128_t x25 = (x23 + x17);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x36);
-{ uint64_t x27 = ((uint64_t)x25 & 0x3fffffffffffff);
-{ uint128_t x28 = (x26 + x16);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x36);
-{ uint64_t x30 = ((uint64_t)x28 & 0x3fffffffffffff);
-{ uint128_t x31 = (x29 + x15);
-{ uint64_t x32 = (uint64_t) (x31 >> 0x36);
-{ uint64_t x33 = ((uint64_t)x31 & 0x3fffffffffffff);
-{ uint128_t x34 = (x32 + x14);
-{ uint64_t x35 = (uint64_t) (x34 >> 0x36);
-{ uint64_t x36 = ((uint64_t)x34 & 0x3fffffffffffff);
-{ uint128_t x37 = (x35 + x13);
-{ uint64_t x38 = (uint64_t) (x37 >> 0x36);
-{ uint64_t x39 = ((uint64_t)x37 & 0x3fffffffffffff);
-{ uint128_t x40 = (x21 + ((uint128_t)0x13 * x38));
-{ uint64_t x41 = (uint64_t) (x40 >> 0x37);
-{ uint64_t x42 = ((uint64_t)x40 & 0x7fffffffffffff);
-{ uint64_t x43 = (x41 + x24);
-{ uint64_t x44 = (x43 >> 0x36);
-{ uint64_t x45 = (x43 & 0x3fffffffffffff);
-out[0] = x39;
-out[1] = x36;
-out[2] = x33;
-out[3] = x30;
-out[4] = x44 + x27;
-out[5] = x45;
-out[6] = x42;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void fesquare(uint64_t out[7], const uint64_t in1[7]) {
+ { const uint64_t x11 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x13 = (((uint128_t)x2 * x11) + ((0x2 * ((uint128_t)x4 * x12)) + ((0x2 * ((uint128_t)x6 * x10)) + ((0x2 * ((uint128_t)x8 * x8)) + ((0x2 * ((uint128_t)x10 * x6)) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x11 * x2)))))));
+ { uint128_t x14 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0x13 * ((uint128_t)x11 * x11)));
+ { uint128_t x15 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x13 * (((uint128_t)x12 * x11) + ((uint128_t)x11 * x12))));
+ { uint128_t x16 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x13 * (((uint128_t)x10 * x11) + (((uint128_t)x12 * x12) + ((uint128_t)x11 * x10)))));
+ { uint128_t x17 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x13 * (((uint128_t)x8 * x11) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + ((uint128_t)x11 * x8))))));
+ { uint128_t x18 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x13 * (((uint128_t)x6 * x11) + (((uint128_t)x8 * x12) + (((uint128_t)x10 * x10) + (((uint128_t)x12 * x8) + ((uint128_t)x11 * x6)))))));
+ { uint128_t x19 = (((uint128_t)x2 * x2) + (0x13 * ((0x2 * ((uint128_t)x4 * x11)) + ((0x2 * ((uint128_t)x6 * x12)) + ((0x2 * ((uint128_t)x8 * x10)) + ((0x2 * ((uint128_t)x10 * x8)) + ((0x2 * ((uint128_t)x12 * x6)) + (0x2 * ((uint128_t)x11 * x4)))))))));
+ { uint128_t x20 = (x19 >> 0x37);
+ { uint64_t x21 = ((uint64_t)x19 & 0x7fffffffffffff);
+ { uint128_t x22 = (x20 + x18);
+ { uint128_t x23 = (x22 >> 0x36);
+ { uint64_t x24 = ((uint64_t)x22 & 0x3fffffffffffff);
+ { uint128_t x25 = (x23 + x17);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x36);
+ { uint64_t x27 = ((uint64_t)x25 & 0x3fffffffffffff);
+ { uint128_t x28 = (x26 + x16);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x36);
+ { uint64_t x30 = ((uint64_t)x28 & 0x3fffffffffffff);
+ { uint128_t x31 = (x29 + x15);
+ { uint64_t x32 = (uint64_t) (x31 >> 0x36);
+ { uint64_t x33 = ((uint64_t)x31 & 0x3fffffffffffff);
+ { uint128_t x34 = (x32 + x14);
+ { uint64_t x35 = (uint64_t) (x34 >> 0x36);
+ { uint64_t x36 = ((uint64_t)x34 & 0x3fffffffffffff);
+ { uint128_t x37 = (x35 + x13);
+ { uint64_t x38 = (uint64_t) (x37 >> 0x36);
+ { uint64_t x39 = ((uint64_t)x37 & 0x3fffffffffffff);
+ { uint128_t x40 = (x21 + ((uint128_t)0x13 * x38));
+ { uint64_t x41 = (uint64_t) (x40 >> 0x37);
+ { uint64_t x42 = ((uint64_t)x40 & 0x7fffffffffffff);
+ { uint64_t x43 = (x41 + x24);
+ { uint64_t x44 = (x43 >> 0x36);
+ { uint64_t x45 = (x43 & 0x3fffffffffffff);
+ out[0] = x42;
+ out[1] = x45;
+ out[2] = (x44 + x27);
+ out[3] = x30;
+ out[4] = x33;
+ out[5] = x36;
+ out[6] = x39;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e379m19/freeze.c b/src/Specific/solinas64_2e379m19/freeze.c
index 32b8dc691..0e60a39c6 100644
--- a/src/Specific/solinas64_2e379m19/freeze.c
+++ b/src/Specific/solinas64_2e379m19/freeze.c
@@ -1,25 +1,39 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x14;
-out[1] = uint8_t x15 = Op Syntax.SubWithGetBorrow 55 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7fffffffffffed;;
+static void freeze(uint64_t out[7], const uint64_t in1[7]) {
+ { const uint64_t x11 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x7fffffffffffed);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x15, Return x4, 0x3fffffffffffff);
+ { uint64_t x20, uint8_t x21 = Op (Syntax.SubWithGetBorrow 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x18, Return x6, 0x3fffffffffffff);
+ { uint64_t x23, uint8_t x24 = Op (Syntax.SubWithGetBorrow 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x21, Return x8, 0x3fffffffffffff);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.SubWithGetBorrow 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x24, Return x10, 0x3fffffffffffff);
+ { uint64_t x29, uint8_t x30 = Op (Syntax.SubWithGetBorrow 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x12, 0x3fffffffffffff);
+ { uint64_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x30, Return x11, 0x3fffffffffffff);
+ { uint64_t x34 = (uint64_t)cmovznz(x33, 0x0, 0xffffffffffffffffL);
+ { uint64_t x35 = (x34 & 0x7fffffffffffed);
+ { uint64_t x37, uint8_t x38 = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x14, Return x35);
+ { uint64_t x39 = (x34 & 0x3fffffffffffff);
+ { uint64_t x41, uint8_t x42 = Op (Syntax.AddWithGetCarry 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x38, Return x17, Return x39);
+ { uint64_t x43 = (x34 & 0x3fffffffffffff);
+ { uint64_t x45, uint8_t x46 = Op (Syntax.AddWithGetCarry 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x42, Return x20, Return x43);
+ { uint64_t x47 = (x34 & 0x3fffffffffffff);
+ { uint64_t x49, uint8_t x50 = Op (Syntax.AddWithGetCarry 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x46, Return x23, Return x47);
+ { uint64_t x51 = (x34 & 0x3fffffffffffff);
+ { uint64_t x53, uint8_t x54 = Op (Syntax.AddWithGetCarry 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x50, Return x26, Return x51);
+ { uint64_t x55 = (x34 & 0x3fffffffffffff);
+ { uint64_t x57, uint8_t x58 = Op (Syntax.AddWithGetCarry 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x54, Return x29, Return x55);
+ { uint64_t x59 = (x34 & 0x3fffffffffffff);
+ { uint64_t x61, uint8_t _ = Op (Syntax.AddWithGetCarry 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x58, Return x32, Return x59);
+ out[0] = x37;
+ out[1] = x41;
+ out[2] = x45;
+ out[3] = x49;
+ out[4] = x53;
+ out[5] = x57;
+ out[6] = x61;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e382m105/femul.c b/src/Specific/solinas64_2e382m105/femul.c
index f187d84a1..79692f2a0 100644
--- a/src/Specific/solinas64_2e382m105/femul.c
+++ b/src/Specific/solinas64_2e382m105/femul.c
@@ -1,76 +1,78 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
-{ uint128_t x40 = (((uint128_t)x5 * x38) + ((0x2 * ((uint128_t)x7 * x39)) + ((0x2 * ((uint128_t)x9 * x37)) + ((0x2 * ((uint128_t)x11 * x35)) + (((uint128_t)x13 * x33) + (((uint128_t)x15 * x31) + ((0x2 * ((uint128_t)x17 * x29)) + ((0x2 * ((uint128_t)x19 * x27)) + ((0x2 * ((uint128_t)x21 * x25)) + ((uint128_t)x20 * x23))))))))));
-{ uint128_t x41 = ((((uint128_t)x5 * x39) + ((0x2 * ((uint128_t)x7 * x37)) + ((0x2 * ((uint128_t)x9 * x35)) + (((uint128_t)x11 * x33) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + ((0x2 * ((uint128_t)x17 * x27)) + ((0x2 * ((uint128_t)x19 * x25)) + ((uint128_t)x21 * x23))))))))) + (0x69 * ((uint128_t)x20 * x38)));
-{ uint128_t x42 = ((((uint128_t)x5 * x37) + ((0x2 * ((uint128_t)x7 * x35)) + (((uint128_t)x9 * x33) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + ((0x2 * ((uint128_t)x17 * x25)) + ((uint128_t)x19 * x23)))))))) + (0x69 * (((uint128_t)x21 * x38) + ((uint128_t)x20 * x39))));
-{ uint128_t x43 = ((((uint128_t)x5 * x35) + (((uint128_t)x7 * x33) + (((uint128_t)x9 * x31) + (((uint128_t)x11 * x29) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + ((uint128_t)x17 * x23))))))) + (0x69 * (((uint128_t)x19 * x38) + (((uint128_t)x21 * x39) + ((uint128_t)x20 * x37)))));
-{ uint128_t x44 = ((((uint128_t)x5 * x33) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + ((0x2 * ((uint128_t)x11 * x27)) + ((0x2 * ((uint128_t)x13 * x25)) + ((uint128_t)x15 * x23)))))) + (0x69 * ((0x2 * ((uint128_t)x17 * x38)) + ((0x2 * ((uint128_t)x19 * x39)) + ((0x2 * ((uint128_t)x21 * x37)) + (0x2 * ((uint128_t)x20 * x35)))))));
-{ uint128_t x45 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((uint128_t)x13 * x23))))) + (0x69 * (((uint128_t)x15 * x38) + ((0x2 * ((uint128_t)x17 * x39)) + ((0x2 * ((uint128_t)x19 * x37)) + ((0x2 * ((uint128_t)x21 * x35)) + ((uint128_t)x20 * x33)))))));
-{ uint128_t x46 = ((((uint128_t)x5 * x29) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((uint128_t)x11 * x23)))) + (0x69 * (((uint128_t)x13 * x38) + (((uint128_t)x15 * x39) + ((0x2 * ((uint128_t)x17 * x37)) + ((0x2 * ((uint128_t)x19 * x35)) + (((uint128_t)x21 * x33) + ((uint128_t)x20 * x31))))))));
-{ uint128_t x47 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((uint128_t)x9 * x23))) + (0x69 * (((uint128_t)x11 * x38) + (((uint128_t)x13 * x39) + (((uint128_t)x15 * x37) + ((0x2 * ((uint128_t)x17 * x35)) + (((uint128_t)x19 * x33) + (((uint128_t)x21 * x31) + ((uint128_t)x20 * x29)))))))));
-{ uint128_t x48 = ((((uint128_t)x5 * x25) + ((uint128_t)x7 * x23)) + (0x69 * (((uint128_t)x9 * x38) + (((uint128_t)x11 * x39) + (((uint128_t)x13 * x37) + (((uint128_t)x15 * x35) + (((uint128_t)x17 * x33) + (((uint128_t)x19 * x31) + (((uint128_t)x21 * x29) + ((uint128_t)x20 * x27))))))))));
-{ uint128_t x49 = (((uint128_t)x5 * x23) + (0x69 * ((0x2 * ((uint128_t)x7 * x38)) + ((0x2 * ((uint128_t)x9 * x39)) + ((0x2 * ((uint128_t)x11 * x37)) + ((0x2 * ((uint128_t)x13 * x35)) + (((uint128_t)x15 * x33) + ((0x2 * ((uint128_t)x17 * x31)) + ((0x2 * ((uint128_t)x19 * x29)) + ((0x2 * ((uint128_t)x21 * x27)) + (0x2 * ((uint128_t)x20 * x25))))))))))));
-{ uint64_t x50 = (uint64_t) (x49 >> 0x27);
-{ uint64_t x51 = ((uint64_t)x49 & 0x7fffffffff);
-{ uint128_t x52 = (x50 + x48);
-{ uint64_t x53 = (uint64_t) (x52 >> 0x26);
-{ uint64_t x54 = ((uint64_t)x52 & 0x3fffffffff);
-{ uint128_t x55 = (x53 + x47);
-{ uint64_t x56 = (uint64_t) (x55 >> 0x26);
-{ uint64_t x57 = ((uint64_t)x55 & 0x3fffffffff);
-{ uint128_t x58 = (x56 + x46);
-{ uint64_t x59 = (uint64_t) (x58 >> 0x26);
-{ uint64_t x60 = ((uint64_t)x58 & 0x3fffffffff);
-{ uint128_t x61 = (x59 + x45);
-{ uint64_t x62 = (uint64_t) (x61 >> 0x26);
-{ uint64_t x63 = ((uint64_t)x61 & 0x3fffffffff);
-{ uint128_t x64 = (x62 + x44);
-{ uint64_t x65 = (uint64_t) (x64 >> 0x27);
-{ uint64_t x66 = ((uint64_t)x64 & 0x7fffffffff);
-{ uint128_t x67 = (x65 + x43);
-{ uint64_t x68 = (uint64_t) (x67 >> 0x26);
-{ uint64_t x69 = ((uint64_t)x67 & 0x3fffffffff);
-{ uint128_t x70 = (x68 + x42);
-{ uint64_t x71 = (uint64_t) (x70 >> 0x26);
-{ uint64_t x72 = ((uint64_t)x70 & 0x3fffffffff);
-{ uint128_t x73 = (x71 + x41);
-{ uint64_t x74 = (uint64_t) (x73 >> 0x26);
-{ uint64_t x75 = ((uint64_t)x73 & 0x3fffffffff);
-{ uint128_t x76 = (x74 + x40);
-{ uint64_t x77 = (uint64_t) (x76 >> 0x26);
-{ uint64_t x78 = ((uint64_t)x76 & 0x3fffffffff);
-{ uint64_t x79 = (x51 + (0x69 * x77));
-{ uint64_t x80 = (x79 >> 0x27);
-{ uint64_t x81 = (x79 & 0x7fffffffff);
-{ uint64_t x82 = (x80 + x54);
-{ uint64_t x83 = (x82 >> 0x26);
-{ uint64_t x84 = (x82 & 0x3fffffffff);
-out[0] = x78;
-out[1] = x75;
-out[2] = x72;
-out[3] = x69;
-out[4] = x66;
-out[5] = x63;
-out[6] = x60;
-out[7] = x83 + x57;
-out[8] = x84;
-out[9] = x81;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void femul(uint64_t out[10], const uint64_t in1[10], const uint64_t in2[10]) {
+ { const uint64_t x20 = in1[9];
+ { const uint64_t x21 = in1[8];
+ { const uint64_t x19 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x38 = in2[9];
+ { const uint64_t x39 = in2[8];
+ { const uint64_t x37 = in2[7];
+ { const uint64_t x35 = in2[6];
+ { const uint64_t x33 = in2[5];
+ { const uint64_t x31 = in2[4];
+ { const uint64_t x29 = in2[3];
+ { const uint64_t x27 = in2[2];
+ { const uint64_t x25 = in2[1];
+ { const uint64_t x23 = in2[0];
+ { uint128_t x40 = (((uint128_t)x5 * x38) + ((0x2 * ((uint128_t)x7 * x39)) + ((0x2 * ((uint128_t)x9 * x37)) + ((0x2 * ((uint128_t)x11 * x35)) + (((uint128_t)x13 * x33) + (((uint128_t)x15 * x31) + ((0x2 * ((uint128_t)x17 * x29)) + ((0x2 * ((uint128_t)x19 * x27)) + ((0x2 * ((uint128_t)x21 * x25)) + ((uint128_t)x20 * x23))))))))));
+ { uint128_t x41 = ((((uint128_t)x5 * x39) + ((0x2 * ((uint128_t)x7 * x37)) + ((0x2 * ((uint128_t)x9 * x35)) + (((uint128_t)x11 * x33) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + ((0x2 * ((uint128_t)x17 * x27)) + ((0x2 * ((uint128_t)x19 * x25)) + ((uint128_t)x21 * x23))))))))) + (0x69 * ((uint128_t)x20 * x38)));
+ { uint128_t x42 = ((((uint128_t)x5 * x37) + ((0x2 * ((uint128_t)x7 * x35)) + (((uint128_t)x9 * x33) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + ((0x2 * ((uint128_t)x17 * x25)) + ((uint128_t)x19 * x23)))))))) + (0x69 * (((uint128_t)x21 * x38) + ((uint128_t)x20 * x39))));
+ { uint128_t x43 = ((((uint128_t)x5 * x35) + (((uint128_t)x7 * x33) + (((uint128_t)x9 * x31) + (((uint128_t)x11 * x29) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + ((uint128_t)x17 * x23))))))) + (0x69 * (((uint128_t)x19 * x38) + (((uint128_t)x21 * x39) + ((uint128_t)x20 * x37)))));
+ { uint128_t x44 = ((((uint128_t)x5 * x33) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + ((0x2 * ((uint128_t)x11 * x27)) + ((0x2 * ((uint128_t)x13 * x25)) + ((uint128_t)x15 * x23)))))) + (0x69 * ((0x2 * ((uint128_t)x17 * x38)) + ((0x2 * ((uint128_t)x19 * x39)) + ((0x2 * ((uint128_t)x21 * x37)) + (0x2 * ((uint128_t)x20 * x35)))))));
+ { uint128_t x45 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((uint128_t)x13 * x23))))) + (0x69 * (((uint128_t)x15 * x38) + ((0x2 * ((uint128_t)x17 * x39)) + ((0x2 * ((uint128_t)x19 * x37)) + ((0x2 * ((uint128_t)x21 * x35)) + ((uint128_t)x20 * x33)))))));
+ { uint128_t x46 = ((((uint128_t)x5 * x29) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((uint128_t)x11 * x23)))) + (0x69 * (((uint128_t)x13 * x38) + (((uint128_t)x15 * x39) + ((0x2 * ((uint128_t)x17 * x37)) + ((0x2 * ((uint128_t)x19 * x35)) + (((uint128_t)x21 * x33) + ((uint128_t)x20 * x31))))))));
+ { uint128_t x47 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((uint128_t)x9 * x23))) + (0x69 * (((uint128_t)x11 * x38) + (((uint128_t)x13 * x39) + (((uint128_t)x15 * x37) + ((0x2 * ((uint128_t)x17 * x35)) + (((uint128_t)x19 * x33) + (((uint128_t)x21 * x31) + ((uint128_t)x20 * x29)))))))));
+ { uint128_t x48 = ((((uint128_t)x5 * x25) + ((uint128_t)x7 * x23)) + (0x69 * (((uint128_t)x9 * x38) + (((uint128_t)x11 * x39) + (((uint128_t)x13 * x37) + (((uint128_t)x15 * x35) + (((uint128_t)x17 * x33) + (((uint128_t)x19 * x31) + (((uint128_t)x21 * x29) + ((uint128_t)x20 * x27))))))))));
+ { uint128_t x49 = (((uint128_t)x5 * x23) + (0x69 * ((0x2 * ((uint128_t)x7 * x38)) + ((0x2 * ((uint128_t)x9 * x39)) + ((0x2 * ((uint128_t)x11 * x37)) + ((0x2 * ((uint128_t)x13 * x35)) + (((uint128_t)x15 * x33) + ((0x2 * ((uint128_t)x17 * x31)) + ((0x2 * ((uint128_t)x19 * x29)) + ((0x2 * ((uint128_t)x21 * x27)) + (0x2 * ((uint128_t)x20 * x25))))))))))));
+ { uint64_t x50 = (uint64_t) (x49 >> 0x27);
+ { uint64_t x51 = ((uint64_t)x49 & 0x7fffffffff);
+ { uint128_t x52 = (x50 + x48);
+ { uint64_t x53 = (uint64_t) (x52 >> 0x26);
+ { uint64_t x54 = ((uint64_t)x52 & 0x3fffffffff);
+ { uint128_t x55 = (x53 + x47);
+ { uint64_t x56 = (uint64_t) (x55 >> 0x26);
+ { uint64_t x57 = ((uint64_t)x55 & 0x3fffffffff);
+ { uint128_t x58 = (x56 + x46);
+ { uint64_t x59 = (uint64_t) (x58 >> 0x26);
+ { uint64_t x60 = ((uint64_t)x58 & 0x3fffffffff);
+ { uint128_t x61 = (x59 + x45);
+ { uint64_t x62 = (uint64_t) (x61 >> 0x26);
+ { uint64_t x63 = ((uint64_t)x61 & 0x3fffffffff);
+ { uint128_t x64 = (x62 + x44);
+ { uint64_t x65 = (uint64_t) (x64 >> 0x27);
+ { uint64_t x66 = ((uint64_t)x64 & 0x7fffffffff);
+ { uint128_t x67 = (x65 + x43);
+ { uint64_t x68 = (uint64_t) (x67 >> 0x26);
+ { uint64_t x69 = ((uint64_t)x67 & 0x3fffffffff);
+ { uint128_t x70 = (x68 + x42);
+ { uint64_t x71 = (uint64_t) (x70 >> 0x26);
+ { uint64_t x72 = ((uint64_t)x70 & 0x3fffffffff);
+ { uint128_t x73 = (x71 + x41);
+ { uint64_t x74 = (uint64_t) (x73 >> 0x26);
+ { uint64_t x75 = ((uint64_t)x73 & 0x3fffffffff);
+ { uint128_t x76 = (x74 + x40);
+ { uint64_t x77 = (uint64_t) (x76 >> 0x26);
+ { uint64_t x78 = ((uint64_t)x76 & 0x3fffffffff);
+ { uint64_t x79 = (x51 + (0x69 * x77));
+ { uint64_t x80 = (x79 >> 0x27);
+ { uint64_t x81 = (x79 & 0x7fffffffff);
+ { uint64_t x82 = (x80 + x54);
+ { uint64_t x83 = (x82 >> 0x26);
+ { uint64_t x84 = (x82 & 0x3fffffffff);
+ out[0] = x81;
+ out[1] = x84;
+ out[2] = (x83 + x57);
+ out[3] = x60;
+ out[4] = x63;
+ out[5] = x66;
+ out[6] = x69;
+ out[7] = x72;
+ out[8] = x75;
+ out[9] = x78;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e382m105/fesquare.c b/src/Specific/solinas64_2e382m105/fesquare.c
index fc560c74b..d88749b03 100644
--- a/src/Specific/solinas64_2e382m105/fesquare.c
+++ b/src/Specific/solinas64_2e382m105/fesquare.c
@@ -1,76 +1,68 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x19 = (((uint128_t)x2 * x17) + ((0x2 * ((uint128_t)x4 * x18)) + ((0x2 * ((uint128_t)x6 * x16)) + ((0x2 * ((uint128_t)x8 * x14)) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + ((0x2 * ((uint128_t)x14 * x8)) + ((0x2 * ((uint128_t)x16 * x6)) + ((0x2 * ((uint128_t)x18 * x4)) + ((uint128_t)x17 * x2))))))))));
-{ uint128_t x20 = ((((uint128_t)x2 * x18) + ((0x2 * ((uint128_t)x4 * x16)) + ((0x2 * ((uint128_t)x6 * x14)) + (((uint128_t)x8 * x12) + (((uint128_t)x10 * x10) + (((uint128_t)x12 * x8) + ((0x2 * ((uint128_t)x14 * x6)) + ((0x2 * ((uint128_t)x16 * x4)) + ((uint128_t)x18 * x2))))))))) + (0x69 * ((uint128_t)x17 * x17)));
-{ uint128_t x21 = ((((uint128_t)x2 * x16) + ((0x2 * ((uint128_t)x4 * x14)) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + ((0x2 * ((uint128_t)x14 * x4)) + ((uint128_t)x16 * x2)))))))) + (0x69 * (((uint128_t)x18 * x17) + ((uint128_t)x17 * x18))));
-{ uint128_t x22 = ((((uint128_t)x2 * x14) + (((uint128_t)x4 * x12) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + (((uint128_t)x12 * x4) + ((uint128_t)x14 * x2))))))) + (0x69 * (((uint128_t)x16 * x17) + (((uint128_t)x18 * x18) + ((uint128_t)x17 * x16)))));
-{ uint128_t x23 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0x69 * ((0x2 * ((uint128_t)x14 * x17)) + ((0x2 * ((uint128_t)x16 * x18)) + ((0x2 * ((uint128_t)x18 * x16)) + (0x2 * ((uint128_t)x17 * x14)))))));
-{ uint128_t x24 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x69 * (((uint128_t)x12 * x17) + ((0x2 * ((uint128_t)x14 * x18)) + ((0x2 * ((uint128_t)x16 * x16)) + ((0x2 * ((uint128_t)x18 * x14)) + ((uint128_t)x17 * x12)))))));
-{ uint128_t x25 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x69 * (((uint128_t)x10 * x17) + (((uint128_t)x12 * x18) + ((0x2 * ((uint128_t)x14 * x16)) + ((0x2 * ((uint128_t)x16 * x14)) + (((uint128_t)x18 * x12) + ((uint128_t)x17 * x10))))))));
-{ uint128_t x26 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x69 * (((uint128_t)x8 * x17) + (((uint128_t)x10 * x18) + (((uint128_t)x12 * x16) + ((0x2 * ((uint128_t)x14 * x14)) + (((uint128_t)x16 * x12) + (((uint128_t)x18 * x10) + ((uint128_t)x17 * x8)))))))));
-{ uint128_t x27 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x69 * (((uint128_t)x6 * x17) + (((uint128_t)x8 * x18) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + (((uint128_t)x18 * x8) + ((uint128_t)x17 * x6))))))))));
-{ uint128_t x28 = (((uint128_t)x2 * x2) + (0x69 * ((0x2 * ((uint128_t)x4 * x17)) + ((0x2 * ((uint128_t)x6 * x18)) + ((0x2 * ((uint128_t)x8 * x16)) + ((0x2 * ((uint128_t)x10 * x14)) + (((uint128_t)x12 * x12) + ((0x2 * ((uint128_t)x14 * x10)) + ((0x2 * ((uint128_t)x16 * x8)) + ((0x2 * ((uint128_t)x18 * x6)) + (0x2 * ((uint128_t)x17 * x4))))))))))));
-{ uint64_t x29 = (uint64_t) (x28 >> 0x27);
-{ uint64_t x30 = ((uint64_t)x28 & 0x7fffffffff);
-{ uint128_t x31 = (x29 + x27);
-{ uint64_t x32 = (uint64_t) (x31 >> 0x26);
-{ uint64_t x33 = ((uint64_t)x31 & 0x3fffffffff);
-{ uint128_t x34 = (x32 + x26);
-{ uint64_t x35 = (uint64_t) (x34 >> 0x26);
-{ uint64_t x36 = ((uint64_t)x34 & 0x3fffffffff);
-{ uint128_t x37 = (x35 + x25);
-{ uint64_t x38 = (uint64_t) (x37 >> 0x26);
-{ uint64_t x39 = ((uint64_t)x37 & 0x3fffffffff);
-{ uint128_t x40 = (x38 + x24);
-{ uint64_t x41 = (uint64_t) (x40 >> 0x26);
-{ uint64_t x42 = ((uint64_t)x40 & 0x3fffffffff);
-{ uint128_t x43 = (x41 + x23);
-{ uint64_t x44 = (uint64_t) (x43 >> 0x27);
-{ uint64_t x45 = ((uint64_t)x43 & 0x7fffffffff);
-{ uint128_t x46 = (x44 + x22);
-{ uint64_t x47 = (uint64_t) (x46 >> 0x26);
-{ uint64_t x48 = ((uint64_t)x46 & 0x3fffffffff);
-{ uint128_t x49 = (x47 + x21);
-{ uint64_t x50 = (uint64_t) (x49 >> 0x26);
-{ uint64_t x51 = ((uint64_t)x49 & 0x3fffffffff);
-{ uint128_t x52 = (x50 + x20);
-{ uint64_t x53 = (uint64_t) (x52 >> 0x26);
-{ uint64_t x54 = ((uint64_t)x52 & 0x3fffffffff);
-{ uint128_t x55 = (x53 + x19);
-{ uint64_t x56 = (uint64_t) (x55 >> 0x26);
-{ uint64_t x57 = ((uint64_t)x55 & 0x3fffffffff);
-{ uint64_t x58 = (x30 + (0x69 * x56));
-{ uint64_t x59 = (x58 >> 0x27);
-{ uint64_t x60 = (x58 & 0x7fffffffff);
-{ uint64_t x61 = (x59 + x33);
-{ uint64_t x62 = (x61 >> 0x26);
-{ uint64_t x63 = (x61 & 0x3fffffffff);
-out[0] = x57;
-out[1] = x54;
-out[2] = x51;
-out[3] = x48;
-out[4] = x45;
-out[5] = x42;
-out[6] = x39;
-out[7] = x62 + x36;
-out[8] = x63;
-out[9] = x60;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void fesquare(uint64_t out[10], const uint64_t in1[10]) {
+ { const uint64_t x17 = in1[9];
+ { const uint64_t x18 = in1[8];
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x19 = (((uint128_t)x2 * x17) + ((0x2 * ((uint128_t)x4 * x18)) + ((0x2 * ((uint128_t)x6 * x16)) + ((0x2 * ((uint128_t)x8 * x14)) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + ((0x2 * ((uint128_t)x14 * x8)) + ((0x2 * ((uint128_t)x16 * x6)) + ((0x2 * ((uint128_t)x18 * x4)) + ((uint128_t)x17 * x2))))))))));
+ { uint128_t x20 = ((((uint128_t)x2 * x18) + ((0x2 * ((uint128_t)x4 * x16)) + ((0x2 * ((uint128_t)x6 * x14)) + (((uint128_t)x8 * x12) + (((uint128_t)x10 * x10) + (((uint128_t)x12 * x8) + ((0x2 * ((uint128_t)x14 * x6)) + ((0x2 * ((uint128_t)x16 * x4)) + ((uint128_t)x18 * x2))))))))) + (0x69 * ((uint128_t)x17 * x17)));
+ { uint128_t x21 = ((((uint128_t)x2 * x16) + ((0x2 * ((uint128_t)x4 * x14)) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + ((0x2 * ((uint128_t)x14 * x4)) + ((uint128_t)x16 * x2)))))))) + (0x69 * (((uint128_t)x18 * x17) + ((uint128_t)x17 * x18))));
+ { uint128_t x22 = ((((uint128_t)x2 * x14) + (((uint128_t)x4 * x12) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + (((uint128_t)x12 * x4) + ((uint128_t)x14 * x2))))))) + (0x69 * (((uint128_t)x16 * x17) + (((uint128_t)x18 * x18) + ((uint128_t)x17 * x16)))));
+ { uint128_t x23 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0x69 * ((0x2 * ((uint128_t)x14 * x17)) + ((0x2 * ((uint128_t)x16 * x18)) + ((0x2 * ((uint128_t)x18 * x16)) + (0x2 * ((uint128_t)x17 * x14)))))));
+ { uint128_t x24 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x69 * (((uint128_t)x12 * x17) + ((0x2 * ((uint128_t)x14 * x18)) + ((0x2 * ((uint128_t)x16 * x16)) + ((0x2 * ((uint128_t)x18 * x14)) + ((uint128_t)x17 * x12)))))));
+ { uint128_t x25 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x69 * (((uint128_t)x10 * x17) + (((uint128_t)x12 * x18) + ((0x2 * ((uint128_t)x14 * x16)) + ((0x2 * ((uint128_t)x16 * x14)) + (((uint128_t)x18 * x12) + ((uint128_t)x17 * x10))))))));
+ { uint128_t x26 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x69 * (((uint128_t)x8 * x17) + (((uint128_t)x10 * x18) + (((uint128_t)x12 * x16) + ((0x2 * ((uint128_t)x14 * x14)) + (((uint128_t)x16 * x12) + (((uint128_t)x18 * x10) + ((uint128_t)x17 * x8)))))))));
+ { uint128_t x27 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x69 * (((uint128_t)x6 * x17) + (((uint128_t)x8 * x18) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + (((uint128_t)x18 * x8) + ((uint128_t)x17 * x6))))))))));
+ { uint128_t x28 = (((uint128_t)x2 * x2) + (0x69 * ((0x2 * ((uint128_t)x4 * x17)) + ((0x2 * ((uint128_t)x6 * x18)) + ((0x2 * ((uint128_t)x8 * x16)) + ((0x2 * ((uint128_t)x10 * x14)) + (((uint128_t)x12 * x12) + ((0x2 * ((uint128_t)x14 * x10)) + ((0x2 * ((uint128_t)x16 * x8)) + ((0x2 * ((uint128_t)x18 * x6)) + (0x2 * ((uint128_t)x17 * x4))))))))))));
+ { uint64_t x29 = (uint64_t) (x28 >> 0x27);
+ { uint64_t x30 = ((uint64_t)x28 & 0x7fffffffff);
+ { uint128_t x31 = (x29 + x27);
+ { uint64_t x32 = (uint64_t) (x31 >> 0x26);
+ { uint64_t x33 = ((uint64_t)x31 & 0x3fffffffff);
+ { uint128_t x34 = (x32 + x26);
+ { uint64_t x35 = (uint64_t) (x34 >> 0x26);
+ { uint64_t x36 = ((uint64_t)x34 & 0x3fffffffff);
+ { uint128_t x37 = (x35 + x25);
+ { uint64_t x38 = (uint64_t) (x37 >> 0x26);
+ { uint64_t x39 = ((uint64_t)x37 & 0x3fffffffff);
+ { uint128_t x40 = (x38 + x24);
+ { uint64_t x41 = (uint64_t) (x40 >> 0x26);
+ { uint64_t x42 = ((uint64_t)x40 & 0x3fffffffff);
+ { uint128_t x43 = (x41 + x23);
+ { uint64_t x44 = (uint64_t) (x43 >> 0x27);
+ { uint64_t x45 = ((uint64_t)x43 & 0x7fffffffff);
+ { uint128_t x46 = (x44 + x22);
+ { uint64_t x47 = (uint64_t) (x46 >> 0x26);
+ { uint64_t x48 = ((uint64_t)x46 & 0x3fffffffff);
+ { uint128_t x49 = (x47 + x21);
+ { uint64_t x50 = (uint64_t) (x49 >> 0x26);
+ { uint64_t x51 = ((uint64_t)x49 & 0x3fffffffff);
+ { uint128_t x52 = (x50 + x20);
+ { uint64_t x53 = (uint64_t) (x52 >> 0x26);
+ { uint64_t x54 = ((uint64_t)x52 & 0x3fffffffff);
+ { uint128_t x55 = (x53 + x19);
+ { uint64_t x56 = (uint64_t) (x55 >> 0x26);
+ { uint64_t x57 = ((uint64_t)x55 & 0x3fffffffff);
+ { uint64_t x58 = (x30 + (0x69 * x56));
+ { uint64_t x59 = (x58 >> 0x27);
+ { uint64_t x60 = (x58 & 0x7fffffffff);
+ { uint64_t x61 = (x59 + x33);
+ { uint64_t x62 = (x61 >> 0x26);
+ { uint64_t x63 = (x61 & 0x3fffffffff);
+ out[0] = x60;
+ out[1] = x63;
+ out[2] = (x62 + x36);
+ out[3] = x39;
+ out[4] = x42;
+ out[5] = x45;
+ out[6] = x48;
+ out[7] = x51;
+ out[8] = x54;
+ out[9] = x57;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e382m105/freeze.c b/src/Specific/solinas64_2e382m105/freeze.c
index df882f8dc..78db27697 100644
--- a/src/Specific/solinas64_2e382m105/freeze.c
+++ b/src/Specific/solinas64_2e382m105/freeze.c
@@ -1,25 +1,54 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x20;
-out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 39 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7fffffff97;;
+static void freeze(uint64_t out[10], const uint64_t in1[10]) {
+ { const uint64_t x17 = in1[9];
+ { const uint64_t x18 = in1[8];
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x20, uint8_t x21 = Op (Syntax.SubWithGetBorrow 39 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x7fffffff97);
+ { uint64_t x23, uint8_t x24 = Op (Syntax.SubWithGetBorrow 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x21, Return x4, 0x3fffffffff);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.SubWithGetBorrow 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x24, Return x6, 0x3fffffffff);
+ { uint64_t x29, uint8_t x30 = Op (Syntax.SubWithGetBorrow 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x8, 0x3fffffffff);
+ { uint64_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x30, Return x10, 0x3fffffffff);
+ { uint64_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 39 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x33, Return x12, 0x7fffffffff);
+ { uint64_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x36, Return x14, 0x3fffffffff);
+ { uint64_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x39, Return x16, 0x3fffffffff);
+ { uint64_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x42, Return x18, 0x3fffffffff);
+ { uint64_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x45, Return x17, 0x3fffffffff);
+ { uint64_t x49 = (uint64_t)cmovznz(x48, 0x0, 0xffffffffffffffffL);
+ { uint64_t x50 = (x49 & 0x7fffffff97);
+ { uint64_t x52, uint8_t x53 = Op (Syntax.AddWithGetCarry 39 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x20, Return x50);
+ { uint64_t x54 = (x49 & 0x3fffffffff);
+ { uint64_t x56, uint8_t x57 = Op (Syntax.AddWithGetCarry 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x53, Return x23, Return x54);
+ { uint64_t x58 = (x49 & 0x3fffffffff);
+ { uint64_t x60, uint8_t x61 = Op (Syntax.AddWithGetCarry 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x57, Return x26, Return x58);
+ { uint64_t x62 = (x49 & 0x3fffffffff);
+ { uint64_t x64, uint8_t x65 = Op (Syntax.AddWithGetCarry 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x61, Return x29, Return x62);
+ { uint64_t x66 = (x49 & 0x3fffffffff);
+ { uint64_t x68, uint8_t x69 = Op (Syntax.AddWithGetCarry 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x65, Return x32, Return x66);
+ { uint64_t x70 = (x49 & 0x7fffffffff);
+ { uint64_t x72, uint8_t x73 = Op (Syntax.AddWithGetCarry 39 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x69, Return x35, Return x70);
+ { uint64_t x74 = (x49 & 0x3fffffffff);
+ { uint64_t x76, uint8_t x77 = Op (Syntax.AddWithGetCarry 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x73, Return x38, Return x74);
+ { uint64_t x78 = (x49 & 0x3fffffffff);
+ { uint64_t x80, uint8_t x81 = Op (Syntax.AddWithGetCarry 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x77, Return x41, Return x78);
+ { uint64_t x82 = (x49 & 0x3fffffffff);
+ { uint64_t x84, uint8_t x85 = Op (Syntax.AddWithGetCarry 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x81, Return x44, Return x82);
+ { uint64_t x86 = (x49 & 0x3fffffffff);
+ { uint64_t x88, uint8_t _ = Op (Syntax.AddWithGetCarry 38 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x85, Return x47, Return x86);
+ out[0] = x52;
+ out[1] = x56;
+ out[2] = x60;
+ out[3] = x64;
+ out[4] = x68;
+ out[5] = x72;
+ out[6] = x76;
+ out[7] = x80;
+ out[8] = x84;
+ out[9] = x88;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e384m2e128m2e96p2e32m1/freeze.c b/src/Specific/solinas64_2e384m2e128m2e96p2e32m1/freeze.c
index d43fdd216..1c04c8171 100644
--- a/src/Specific/solinas64_2e384m2e128m2e96p2e32m1/freeze.c
+++ b/src/Specific/solinas64_2e384m2e128m2e96p2e32m1/freeze.c
@@ -1,25 +1,43 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x16;
-out[1] = ℤ x17 = Op Syntax.SubWithGetBorrow 48 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TZ 0x0;
-out[2] = x2;
-out[3] = 0xffffffff;;
+static void freeze(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x16, ℤ x17 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) Syntax.TZ) (0x0, Return x2, 0xffffffff);
+ { uint64_t x19, ℤ x20 = Op (Syntax.SubWithGetBorrow 48 Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) Syntax.TZ) (Return x17, Return x4, 0x0);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 48 Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x20, Return x6, 0xfffeffffffff);
+ { uint64_t x25, uint8_t x26 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x8, 0xffffffffffff);
+ { uint64_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x26, Return x10, 0xffffffffffff);
+ { uint64_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x29, Return x12, 0xffffffffffff);
+ { uint64_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x32, Return x14, 0xffffffffffff);
+ { uint64_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x35, Return x13, 0xffffffffffff);
+ { uint64_t x39 = (uint64_t)cmovznz(x38, 0x0, 0xffffffffffffffffL);
+ { uint64_t x40 = (x39 & 0xffffffff);
+ { uint64_t x42, uint8_t x43 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x16, Return x40);
+ { uint64_t x45, uint8_t x46 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x43, Return x19, 0x0);
+ { uint64_t x47 = (x39 & 0xfffeffffffff);
+ { uint64_t x49, uint8_t x50 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x46, Return x22, Return x47);
+ { uint64_t x51 = (x39 & 0xffffffffffff);
+ { uint64_t x53, uint8_t x54 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x50, Return x25, Return x51);
+ { uint64_t x55 = (x39 & 0xffffffffffff);
+ { uint64_t x57, uint8_t x58 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x54, Return x28, Return x55);
+ { uint64_t x59 = (x39 & 0xffffffffffff);
+ { uint64_t x61, uint8_t x62 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x58, Return x31, Return x59);
+ { uint64_t x63 = (x39 & 0xffffffffffff);
+ { uint64_t x65, uint8_t x66 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x62, Return x34, Return x63);
+ { uint64_t x67 = (x39 & 0xffffffffffff);
+ { uint64_t x69, uint8_t _ = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x66, Return x37, Return x67);
+ out[0] = x42;
+ out[1] = x45;
+ out[2] = x49;
+ out[3] = x53;
+ out[4] = x57;
+ out[5] = x61;
+ out[6] = x65;
+ out[7] = x69;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e384m317/femul.c b/src/Specific/solinas64_2e384m317/femul.c
index 69a256342..a28b9a126 100644
--- a/src/Specific/solinas64_2e384m317/femul.c
+++ b/src/Specific/solinas64_2e384m317/femul.c
@@ -1,66 +1,64 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint128_t x32 = (((uint128_t)x5 * x30) + (((uint128_t)x7 * x31) + (((uint128_t)x9 * x29) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + (((uint128_t)x17 * x21) + ((uint128_t)x16 * x19))))))));
-{ uint128_t x33 = ((((uint128_t)x5 * x31) + (((uint128_t)x7 * x29) + (((uint128_t)x9 * x27) + (((uint128_t)x11 * x25) + (((uint128_t)x13 * x23) + (((uint128_t)x15 * x21) + ((uint128_t)x17 * x19))))))) + (0x13d * ((uint128_t)x16 * x30)));
-{ uint128_t x34 = ((((uint128_t)x5 * x29) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x15 * x19)))))) + (0x13d * (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))));
-{ uint128_t x35 = ((((uint128_t)x5 * x27) + (((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + (((uint128_t)x11 * x21) + ((uint128_t)x13 * x19))))) + (0x13d * (((uint128_t)x15 * x30) + (((uint128_t)x17 * x31) + ((uint128_t)x16 * x29)))));
-{ uint128_t x36 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))) + (0x13d * (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27))))));
-{ uint128_t x37 = ((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + ((uint128_t)x9 * x19))) + (0x13d * (((uint128_t)x11 * x30) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + (((uint128_t)x17 * x27) + ((uint128_t)x16 * x25)))))));
-{ uint128_t x38 = ((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (0x13d * (((uint128_t)x9 * x30) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + ((uint128_t)x16 * x23))))))));
-{ uint128_t x39 = (((uint128_t)x5 * x19) + (0x13d * (((uint128_t)x7 * x30) + (((uint128_t)x9 * x31) + (((uint128_t)x11 * x29) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + (((uint128_t)x17 * x23) + ((uint128_t)x16 * x21)))))))));
-{ uint64_t x40 = (uint64_t) (x39 >> 0x30);
-{ uint64_t x41 = ((uint64_t)x39 & 0xffffffffffff);
-{ uint128_t x42 = (x40 + x38);
-{ uint64_t x43 = (uint64_t) (x42 >> 0x30);
-{ uint64_t x44 = ((uint64_t)x42 & 0xffffffffffff);
-{ uint128_t x45 = (x43 + x37);
-{ uint64_t x46 = (uint64_t) (x45 >> 0x30);
-{ uint64_t x47 = ((uint64_t)x45 & 0xffffffffffff);
-{ uint128_t x48 = (x46 + x36);
-{ uint64_t x49 = (uint64_t) (x48 >> 0x30);
-{ uint64_t x50 = ((uint64_t)x48 & 0xffffffffffff);
-{ uint128_t x51 = (x49 + x35);
-{ uint64_t x52 = (uint64_t) (x51 >> 0x30);
-{ uint64_t x53 = ((uint64_t)x51 & 0xffffffffffff);
-{ uint128_t x54 = (x52 + x34);
-{ uint64_t x55 = (uint64_t) (x54 >> 0x30);
-{ uint64_t x56 = ((uint64_t)x54 & 0xffffffffffff);
-{ uint128_t x57 = (x55 + x33);
-{ uint64_t x58 = (uint64_t) (x57 >> 0x30);
-{ uint64_t x59 = ((uint64_t)x57 & 0xffffffffffff);
-{ uint128_t x60 = (x58 + x32);
-{ uint64_t x61 = (uint64_t) (x60 >> 0x30);
-{ uint64_t x62 = ((uint64_t)x60 & 0xffffffffffff);
-{ uint64_t x63 = (x41 + (0x13d * x61));
-{ uint64_t x64 = (x63 >> 0x30);
-{ uint64_t x65 = (x63 & 0xffffffffffff);
-{ uint64_t x66 = (x64 + x44);
-{ uint64_t x67 = (x66 >> 0x30);
-{ uint64_t x68 = (x66 & 0xffffffffffff);
-out[0] = x62;
-out[1] = x59;
-out[2] = x56;
-out[3] = x53;
-out[4] = x50;
-out[5] = x67 + x47;
-out[6] = x68;
-out[7] = x65;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void femul(uint64_t out[8], const uint64_t in1[8], const uint64_t in2[8]) {
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x30 = in2[7];
+ { const uint64_t x31 = in2[6];
+ { const uint64_t x29 = in2[5];
+ { const uint64_t x27 = in2[4];
+ { const uint64_t x25 = in2[3];
+ { const uint64_t x23 = in2[2];
+ { const uint64_t x21 = in2[1];
+ { const uint64_t x19 = in2[0];
+ { uint128_t x32 = (((uint128_t)x5 * x30) + (((uint128_t)x7 * x31) + (((uint128_t)x9 * x29) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + (((uint128_t)x17 * x21) + ((uint128_t)x16 * x19))))))));
+ { uint128_t x33 = ((((uint128_t)x5 * x31) + (((uint128_t)x7 * x29) + (((uint128_t)x9 * x27) + (((uint128_t)x11 * x25) + (((uint128_t)x13 * x23) + (((uint128_t)x15 * x21) + ((uint128_t)x17 * x19))))))) + (0x13d * ((uint128_t)x16 * x30)));
+ { uint128_t x34 = ((((uint128_t)x5 * x29) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x15 * x19)))))) + (0x13d * (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))));
+ { uint128_t x35 = ((((uint128_t)x5 * x27) + (((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + (((uint128_t)x11 * x21) + ((uint128_t)x13 * x19))))) + (0x13d * (((uint128_t)x15 * x30) + (((uint128_t)x17 * x31) + ((uint128_t)x16 * x29)))));
+ { uint128_t x36 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))) + (0x13d * (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27))))));
+ { uint128_t x37 = ((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + ((uint128_t)x9 * x19))) + (0x13d * (((uint128_t)x11 * x30) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + (((uint128_t)x17 * x27) + ((uint128_t)x16 * x25)))))));
+ { uint128_t x38 = ((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (0x13d * (((uint128_t)x9 * x30) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + ((uint128_t)x16 * x23))))))));
+ { uint128_t x39 = (((uint128_t)x5 * x19) + (0x13d * (((uint128_t)x7 * x30) + (((uint128_t)x9 * x31) + (((uint128_t)x11 * x29) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + (((uint128_t)x17 * x23) + ((uint128_t)x16 * x21)))))))));
+ { uint64_t x40 = (uint64_t) (x39 >> 0x30);
+ { uint64_t x41 = ((uint64_t)x39 & 0xffffffffffff);
+ { uint128_t x42 = (x40 + x38);
+ { uint64_t x43 = (uint64_t) (x42 >> 0x30);
+ { uint64_t x44 = ((uint64_t)x42 & 0xffffffffffff);
+ { uint128_t x45 = (x43 + x37);
+ { uint64_t x46 = (uint64_t) (x45 >> 0x30);
+ { uint64_t x47 = ((uint64_t)x45 & 0xffffffffffff);
+ { uint128_t x48 = (x46 + x36);
+ { uint64_t x49 = (uint64_t) (x48 >> 0x30);
+ { uint64_t x50 = ((uint64_t)x48 & 0xffffffffffff);
+ { uint128_t x51 = (x49 + x35);
+ { uint64_t x52 = (uint64_t) (x51 >> 0x30);
+ { uint64_t x53 = ((uint64_t)x51 & 0xffffffffffff);
+ { uint128_t x54 = (x52 + x34);
+ { uint64_t x55 = (uint64_t) (x54 >> 0x30);
+ { uint64_t x56 = ((uint64_t)x54 & 0xffffffffffff);
+ { uint128_t x57 = (x55 + x33);
+ { uint64_t x58 = (uint64_t) (x57 >> 0x30);
+ { uint64_t x59 = ((uint64_t)x57 & 0xffffffffffff);
+ { uint128_t x60 = (x58 + x32);
+ { uint64_t x61 = (uint64_t) (x60 >> 0x30);
+ { uint64_t x62 = ((uint64_t)x60 & 0xffffffffffff);
+ { uint64_t x63 = (x41 + (0x13d * x61));
+ { uint64_t x64 = (x63 >> 0x30);
+ { uint64_t x65 = (x63 & 0xffffffffffff);
+ { uint64_t x66 = (x64 + x44);
+ { uint64_t x67 = (x66 >> 0x30);
+ { uint64_t x68 = (x66 & 0xffffffffffff);
+ out[0] = x65;
+ out[1] = x68;
+ out[2] = (x67 + x47);
+ out[3] = x50;
+ out[4] = x53;
+ out[5] = x56;
+ out[6] = x59;
+ out[7] = x62;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e384m317/fesquare.c b/src/Specific/solinas64_2e384m317/fesquare.c
index 0bb54dc0d..87644b008 100644
--- a/src/Specific/solinas64_2e384m317/fesquare.c
+++ b/src/Specific/solinas64_2e384m317/fesquare.c
@@ -1,66 +1,56 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x15 = (((uint128_t)x2 * x13) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x13 * x2))))))));
-{ uint128_t x16 = ((((uint128_t)x2 * x14) + (((uint128_t)x4 * x12) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + (((uint128_t)x12 * x4) + ((uint128_t)x14 * x2))))))) + (0x13d * ((uint128_t)x13 * x13)));
-{ uint128_t x17 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x13d * (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))));
-{ uint128_t x18 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0x13d * (((uint128_t)x12 * x13) + (((uint128_t)x14 * x14) + ((uint128_t)x13 * x12)))));
-{ uint128_t x19 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x13d * (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10))))));
-{ uint128_t x20 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x13d * (((uint128_t)x8 * x13) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + (((uint128_t)x14 * x10) + ((uint128_t)x13 * x8)))))));
-{ uint128_t x21 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x13d * (((uint128_t)x6 * x13) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + ((uint128_t)x13 * x6))))))));
-{ uint128_t x22 = (((uint128_t)x2 * x2) + (0x13d * (((uint128_t)x4 * x13) + (((uint128_t)x6 * x14) + (((uint128_t)x8 * x12) + (((uint128_t)x10 * x10) + (((uint128_t)x12 * x8) + (((uint128_t)x14 * x6) + ((uint128_t)x13 * x4)))))))));
-{ uint64_t x23 = (uint64_t) (x22 >> 0x30);
-{ uint64_t x24 = ((uint64_t)x22 & 0xffffffffffff);
-{ uint128_t x25 = (x23 + x21);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x30);
-{ uint64_t x27 = ((uint64_t)x25 & 0xffffffffffff);
-{ uint128_t x28 = (x26 + x20);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x30);
-{ uint64_t x30 = ((uint64_t)x28 & 0xffffffffffff);
-{ uint128_t x31 = (x29 + x19);
-{ uint64_t x32 = (uint64_t) (x31 >> 0x30);
-{ uint64_t x33 = ((uint64_t)x31 & 0xffffffffffff);
-{ uint128_t x34 = (x32 + x18);
-{ uint64_t x35 = (uint64_t) (x34 >> 0x30);
-{ uint64_t x36 = ((uint64_t)x34 & 0xffffffffffff);
-{ uint128_t x37 = (x35 + x17);
-{ uint64_t x38 = (uint64_t) (x37 >> 0x30);
-{ uint64_t x39 = ((uint64_t)x37 & 0xffffffffffff);
-{ uint128_t x40 = (x38 + x16);
-{ uint64_t x41 = (uint64_t) (x40 >> 0x30);
-{ uint64_t x42 = ((uint64_t)x40 & 0xffffffffffff);
-{ uint128_t x43 = (x41 + x15);
-{ uint64_t x44 = (uint64_t) (x43 >> 0x30);
-{ uint64_t x45 = ((uint64_t)x43 & 0xffffffffffff);
-{ uint64_t x46 = (x24 + (0x13d * x44));
-{ uint64_t x47 = (x46 >> 0x30);
-{ uint64_t x48 = (x46 & 0xffffffffffff);
-{ uint64_t x49 = (x47 + x27);
-{ uint64_t x50 = (x49 >> 0x30);
-{ uint64_t x51 = (x49 & 0xffffffffffff);
-out[0] = x45;
-out[1] = x42;
-out[2] = x39;
-out[3] = x36;
-out[4] = x33;
-out[5] = x50 + x30;
-out[6] = x51;
-out[7] = x48;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void fesquare(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x15 = (((uint128_t)x2 * x13) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x13 * x2))))))));
+ { uint128_t x16 = ((((uint128_t)x2 * x14) + (((uint128_t)x4 * x12) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + (((uint128_t)x12 * x4) + ((uint128_t)x14 * x2))))))) + (0x13d * ((uint128_t)x13 * x13)));
+ { uint128_t x17 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x13d * (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))));
+ { uint128_t x18 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0x13d * (((uint128_t)x12 * x13) + (((uint128_t)x14 * x14) + ((uint128_t)x13 * x12)))));
+ { uint128_t x19 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x13d * (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10))))));
+ { uint128_t x20 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x13d * (((uint128_t)x8 * x13) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + (((uint128_t)x14 * x10) + ((uint128_t)x13 * x8)))))));
+ { uint128_t x21 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x13d * (((uint128_t)x6 * x13) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + ((uint128_t)x13 * x6))))))));
+ { uint128_t x22 = (((uint128_t)x2 * x2) + (0x13d * (((uint128_t)x4 * x13) + (((uint128_t)x6 * x14) + (((uint128_t)x8 * x12) + (((uint128_t)x10 * x10) + (((uint128_t)x12 * x8) + (((uint128_t)x14 * x6) + ((uint128_t)x13 * x4)))))))));
+ { uint64_t x23 = (uint64_t) (x22 >> 0x30);
+ { uint64_t x24 = ((uint64_t)x22 & 0xffffffffffff);
+ { uint128_t x25 = (x23 + x21);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x30);
+ { uint64_t x27 = ((uint64_t)x25 & 0xffffffffffff);
+ { uint128_t x28 = (x26 + x20);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x30);
+ { uint64_t x30 = ((uint64_t)x28 & 0xffffffffffff);
+ { uint128_t x31 = (x29 + x19);
+ { uint64_t x32 = (uint64_t) (x31 >> 0x30);
+ { uint64_t x33 = ((uint64_t)x31 & 0xffffffffffff);
+ { uint128_t x34 = (x32 + x18);
+ { uint64_t x35 = (uint64_t) (x34 >> 0x30);
+ { uint64_t x36 = ((uint64_t)x34 & 0xffffffffffff);
+ { uint128_t x37 = (x35 + x17);
+ { uint64_t x38 = (uint64_t) (x37 >> 0x30);
+ { uint64_t x39 = ((uint64_t)x37 & 0xffffffffffff);
+ { uint128_t x40 = (x38 + x16);
+ { uint64_t x41 = (uint64_t) (x40 >> 0x30);
+ { uint64_t x42 = ((uint64_t)x40 & 0xffffffffffff);
+ { uint128_t x43 = (x41 + x15);
+ { uint64_t x44 = (uint64_t) (x43 >> 0x30);
+ { uint64_t x45 = ((uint64_t)x43 & 0xffffffffffff);
+ { uint64_t x46 = (x24 + (0x13d * x44));
+ { uint64_t x47 = (x46 >> 0x30);
+ { uint64_t x48 = (x46 & 0xffffffffffff);
+ { uint64_t x49 = (x47 + x27);
+ { uint64_t x50 = (x49 >> 0x30);
+ { uint64_t x51 = (x49 & 0xffffffffffff);
+ out[0] = x48;
+ out[1] = x51;
+ out[2] = (x50 + x30);
+ out[3] = x33;
+ out[4] = x36;
+ out[5] = x39;
+ out[6] = x42;
+ out[7] = x45;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e384m317/freeze.c b/src/Specific/solinas64_2e384m317/freeze.c
index 080bf44e2..810814cd5 100644
--- a/src/Specific/solinas64_2e384m317/freeze.c
+++ b/src/Specific/solinas64_2e384m317/freeze.c
@@ -1,25 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x16;
-out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 48 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xfffffffffec3;;
+static void freeze(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xfffffffffec3);
+ { uint64_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x17, Return x4, 0xffffffffffff);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x20, Return x6, 0xffffffffffff);
+ { uint64_t x25, uint8_t x26 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x8, 0xffffffffffff);
+ { uint64_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x26, Return x10, 0xffffffffffff);
+ { uint64_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x29, Return x12, 0xffffffffffff);
+ { uint64_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x32, Return x14, 0xffffffffffff);
+ { uint64_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x35, Return x13, 0xffffffffffff);
+ { uint64_t x39 = (uint64_t)cmovznz(x38, 0x0, 0xffffffffffffffffL);
+ { uint64_t x40 = (x39 & 0xfffffffffec3);
+ { uint64_t x42, uint8_t x43 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x16, Return x40);
+ { uint64_t x44 = (x39 & 0xffffffffffff);
+ { uint64_t x46, uint8_t x47 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x43, Return x19, Return x44);
+ { uint64_t x48 = (x39 & 0xffffffffffff);
+ { uint64_t x50, uint8_t x51 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x47, Return x22, Return x48);
+ { uint64_t x52 = (x39 & 0xffffffffffff);
+ { uint64_t x54, uint8_t x55 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x51, Return x25, Return x52);
+ { uint64_t x56 = (x39 & 0xffffffffffff);
+ { uint64_t x58, uint8_t x59 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x55, Return x28, Return x56);
+ { uint64_t x60 = (x39 & 0xffffffffffff);
+ { uint64_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x59, Return x31, Return x60);
+ { uint64_t x64 = (x39 & 0xffffffffffff);
+ { uint64_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x63, Return x34, Return x64);
+ { uint64_t x68 = (x39 & 0xffffffffffff);
+ { uint64_t x70, uint8_t _ = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x67, Return x37, Return x68);
+ out[0] = x42;
+ out[1] = x46;
+ out[2] = x50;
+ out[3] = x54;
+ out[4] = x58;
+ out[5] = x62;
+ out[6] = x66;
+ out[7] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e384m5x2e368m1/freeze.c b/src/Specific/solinas64_2e384m5x2e368m1/freeze.c
index b66ff7c62..42303c62a 100644
--- a/src/Specific/solinas64_2e384m5x2e368m1/freeze.c
+++ b/src/Specific/solinas64_2e384m5x2e368m1/freeze.c
@@ -1,25 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x16;
-out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 48 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffffffffff;;
+static void freeze(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xffffffffffff);
+ { uint64_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x17, Return x4, 0xffffffffffff);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x20, Return x6, 0xffffffffffff);
+ { uint64_t x25, uint8_t x26 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x8, 0xffffffffffff);
+ { uint64_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x26, Return x10, 0xffffffffffff);
+ { uint64_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x29, Return x12, 0xffffffffffff);
+ { uint64_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x32, Return x14, 0xffffffffffff);
+ { uint64_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x35, Return x13, 0xfffaffffffff);
+ { uint64_t x39 = (uint64_t)cmovznz(x38, 0x0, 0xffffffffffffffffL);
+ { uint64_t x40 = (x39 & 0xffffffffffff);
+ { uint64_t x42, uint8_t x43 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x16, Return x40);
+ { uint64_t x44 = (x39 & 0xffffffffffff);
+ { uint64_t x46, uint8_t x47 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x43, Return x19, Return x44);
+ { uint64_t x48 = (x39 & 0xffffffffffff);
+ { uint64_t x50, uint8_t x51 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x47, Return x22, Return x48);
+ { uint64_t x52 = (x39 & 0xffffffffffff);
+ { uint64_t x54, uint8_t x55 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x51, Return x25, Return x52);
+ { uint64_t x56 = (x39 & 0xffffffffffff);
+ { uint64_t x58, uint8_t x59 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x55, Return x28, Return x56);
+ { uint64_t x60 = (x39 & 0xffffffffffff);
+ { uint64_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x59, Return x31, Return x60);
+ { uint64_t x64 = (x39 & 0xffffffffffff);
+ { uint64_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x63, Return x34, Return x64);
+ { uint64_t x68 = (x39 & 0xfffaffffffff);
+ { uint64_t x70, uint8_t _ = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x67, Return x37, Return x68);
+ out[0] = x42;
+ out[1] = x46;
+ out[2] = x50;
+ out[3] = x54;
+ out[4] = x58;
+ out[5] = x62;
+ out[6] = x66;
+ out[7] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e384m79x2e376m1/freeze.c b/src/Specific/solinas64_2e384m79x2e376m1/freeze.c
index b66ff7c62..4c264cc11 100644
--- a/src/Specific/solinas64_2e384m79x2e376m1/freeze.c
+++ b/src/Specific/solinas64_2e384m79x2e376m1/freeze.c
@@ -1,25 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x16;
-out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 48 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffffffffff;;
+static void freeze(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xffffffffffff);
+ { uint64_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x17, Return x4, 0xffffffffffff);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x20, Return x6, 0xffffffffffff);
+ { uint64_t x25, uint8_t x26 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x8, 0xffffffffffff);
+ { uint64_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x26, Return x10, 0xffffffffffff);
+ { uint64_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x29, Return x12, 0xffffffffffff);
+ { uint64_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x32, Return x14, 0xffffffffffff);
+ { uint64_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x35, Return x13, 0xb0ffffffffff);
+ { uint64_t x39 = (uint64_t)cmovznz(x38, 0x0, 0xffffffffffffffffL);
+ { uint64_t x40 = (x39 & 0xffffffffffff);
+ { uint64_t x42, uint8_t x43 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x16, Return x40);
+ { uint64_t x44 = (x39 & 0xffffffffffff);
+ { uint64_t x46, uint8_t x47 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x43, Return x19, Return x44);
+ { uint64_t x48 = (x39 & 0xffffffffffff);
+ { uint64_t x50, uint8_t x51 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x47, Return x22, Return x48);
+ { uint64_t x52 = (x39 & 0xffffffffffff);
+ { uint64_t x54, uint8_t x55 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x51, Return x25, Return x52);
+ { uint64_t x56 = (x39 & 0xffffffffffff);
+ { uint64_t x58, uint8_t x59 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x55, Return x28, Return x56);
+ { uint64_t x60 = (x39 & 0xffffffffffff);
+ { uint64_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x59, Return x31, Return x60);
+ { uint64_t x64 = (x39 & 0xffffffffffff);
+ { uint64_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x63, Return x34, Return x64);
+ { uint64_t x68 = (x39 & 0xb0ffffffffff);
+ { uint64_t x70, uint8_t _ = Op (Syntax.AddWithGetCarry 48 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x67, Return x37, Return x68);
+ out[0] = x42;
+ out[1] = x46;
+ out[2] = x50;
+ out[3] = x54;
+ out[4] = x58;
+ out[5] = x62;
+ out[6] = x66;
+ out[7] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e401m31/femul.c b/src/Specific/solinas64_2e401m31/femul.c
index 4ff9c6bda..e9f8d6aef 100644
--- a/src/Specific/solinas64_2e401m31/femul.c
+++ b/src/Specific/solinas64_2e401m31/femul.c
@@ -1,66 +1,64 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint128_t x32 = (((uint128_t)x5 * x30) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + ((0x2 * ((uint128_t)x11 * x27)) + ((0x2 * ((uint128_t)x13 * x25)) + ((0x2 * ((uint128_t)x15 * x23)) + ((0x2 * ((uint128_t)x17 * x21)) + ((uint128_t)x16 * x19))))))));
-{ uint128_t x33 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((0x2 * ((uint128_t)x13 * x23)) + ((0x2 * ((uint128_t)x15 * x21)) + ((uint128_t)x17 * x19))))))) + (0x1f * ((uint128_t)x16 * x30)));
-{ uint128_t x34 = ((((uint128_t)x5 * x29) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((0x2 * ((uint128_t)x11 * x23)) + ((0x2 * ((uint128_t)x13 * x21)) + ((uint128_t)x15 * x19)))))) + (0x1f * (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))));
-{ uint128_t x35 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((0x2 * ((uint128_t)x9 * x23)) + ((0x2 * ((uint128_t)x11 * x21)) + ((uint128_t)x13 * x19))))) + (0x1f * (((uint128_t)x15 * x30) + (((uint128_t)x17 * x31) + ((uint128_t)x16 * x29)))));
-{ uint128_t x36 = ((((uint128_t)x5 * x25) + ((0x2 * ((uint128_t)x7 * x23)) + ((0x2 * ((uint128_t)x9 * x21)) + ((uint128_t)x11 * x19)))) + (0x1f * (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27))))));
-{ uint128_t x37 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((uint128_t)x9 * x19))) + (0x1f * (((uint128_t)x11 * x30) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + (((uint128_t)x17 * x27) + ((uint128_t)x16 * x25)))))));
-{ uint128_t x38 = ((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (0x1f * (((uint128_t)x9 * x30) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + ((uint128_t)x16 * x23))))))));
-{ uint128_t x39 = (((uint128_t)x5 * x19) + (0x1f * ((0x2 * ((uint128_t)x7 * x30)) + ((0x2 * ((uint128_t)x9 * x31)) + ((0x2 * ((uint128_t)x11 * x29)) + ((0x2 * ((uint128_t)x13 * x27)) + ((0x2 * ((uint128_t)x15 * x25)) + ((0x2 * ((uint128_t)x17 * x23)) + (0x2 * ((uint128_t)x16 * x21))))))))));
-{ uint64_t x40 = (uint64_t) (x39 >> 0x33);
-{ uint64_t x41 = ((uint64_t)x39 & 0x7ffffffffffff);
-{ uint128_t x42 = (x40 + x38);
-{ uint64_t x43 = (uint64_t) (x42 >> 0x32);
-{ uint64_t x44 = ((uint64_t)x42 & 0x3ffffffffffff);
-{ uint128_t x45 = (x43 + x37);
-{ uint64_t x46 = (uint64_t) (x45 >> 0x32);
-{ uint64_t x47 = ((uint64_t)x45 & 0x3ffffffffffff);
-{ uint128_t x48 = (x46 + x36);
-{ uint64_t x49 = (uint64_t) (x48 >> 0x32);
-{ uint64_t x50 = ((uint64_t)x48 & 0x3ffffffffffff);
-{ uint128_t x51 = (x49 + x35);
-{ uint64_t x52 = (uint64_t) (x51 >> 0x32);
-{ uint64_t x53 = ((uint64_t)x51 & 0x3ffffffffffff);
-{ uint128_t x54 = (x52 + x34);
-{ uint64_t x55 = (uint64_t) (x54 >> 0x32);
-{ uint64_t x56 = ((uint64_t)x54 & 0x3ffffffffffff);
-{ uint128_t x57 = (x55 + x33);
-{ uint64_t x58 = (uint64_t) (x57 >> 0x32);
-{ uint64_t x59 = ((uint64_t)x57 & 0x3ffffffffffff);
-{ uint128_t x60 = (x58 + x32);
-{ uint64_t x61 = (uint64_t) (x60 >> 0x32);
-{ uint64_t x62 = ((uint64_t)x60 & 0x3ffffffffffff);
-{ uint64_t x63 = (x41 + (0x1f * x61));
-{ uint64_t x64 = (x63 >> 0x33);
-{ uint64_t x65 = (x63 & 0x7ffffffffffff);
-{ uint64_t x66 = (x64 + x44);
-{ uint64_t x67 = (x66 >> 0x32);
-{ uint64_t x68 = (x66 & 0x3ffffffffffff);
-out[0] = x62;
-out[1] = x59;
-out[2] = x56;
-out[3] = x53;
-out[4] = x50;
-out[5] = x67 + x47;
-out[6] = x68;
-out[7] = x65;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void femul(uint64_t out[8], const uint64_t in1[8], const uint64_t in2[8]) {
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x30 = in2[7];
+ { const uint64_t x31 = in2[6];
+ { const uint64_t x29 = in2[5];
+ { const uint64_t x27 = in2[4];
+ { const uint64_t x25 = in2[3];
+ { const uint64_t x23 = in2[2];
+ { const uint64_t x21 = in2[1];
+ { const uint64_t x19 = in2[0];
+ { uint128_t x32 = (((uint128_t)x5 * x30) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + ((0x2 * ((uint128_t)x11 * x27)) + ((0x2 * ((uint128_t)x13 * x25)) + ((0x2 * ((uint128_t)x15 * x23)) + ((0x2 * ((uint128_t)x17 * x21)) + ((uint128_t)x16 * x19))))))));
+ { uint128_t x33 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((0x2 * ((uint128_t)x13 * x23)) + ((0x2 * ((uint128_t)x15 * x21)) + ((uint128_t)x17 * x19))))))) + (0x1f * ((uint128_t)x16 * x30)));
+ { uint128_t x34 = ((((uint128_t)x5 * x29) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((0x2 * ((uint128_t)x11 * x23)) + ((0x2 * ((uint128_t)x13 * x21)) + ((uint128_t)x15 * x19)))))) + (0x1f * (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))));
+ { uint128_t x35 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((0x2 * ((uint128_t)x9 * x23)) + ((0x2 * ((uint128_t)x11 * x21)) + ((uint128_t)x13 * x19))))) + (0x1f * (((uint128_t)x15 * x30) + (((uint128_t)x17 * x31) + ((uint128_t)x16 * x29)))));
+ { uint128_t x36 = ((((uint128_t)x5 * x25) + ((0x2 * ((uint128_t)x7 * x23)) + ((0x2 * ((uint128_t)x9 * x21)) + ((uint128_t)x11 * x19)))) + (0x1f * (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27))))));
+ { uint128_t x37 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((uint128_t)x9 * x19))) + (0x1f * (((uint128_t)x11 * x30) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + (((uint128_t)x17 * x27) + ((uint128_t)x16 * x25)))))));
+ { uint128_t x38 = ((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (0x1f * (((uint128_t)x9 * x30) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + ((uint128_t)x16 * x23))))))));
+ { uint128_t x39 = (((uint128_t)x5 * x19) + (0x1f * ((0x2 * ((uint128_t)x7 * x30)) + ((0x2 * ((uint128_t)x9 * x31)) + ((0x2 * ((uint128_t)x11 * x29)) + ((0x2 * ((uint128_t)x13 * x27)) + ((0x2 * ((uint128_t)x15 * x25)) + ((0x2 * ((uint128_t)x17 * x23)) + (0x2 * ((uint128_t)x16 * x21))))))))));
+ { uint64_t x40 = (uint64_t) (x39 >> 0x33);
+ { uint64_t x41 = ((uint64_t)x39 & 0x7ffffffffffff);
+ { uint128_t x42 = (x40 + x38);
+ { uint64_t x43 = (uint64_t) (x42 >> 0x32);
+ { uint64_t x44 = ((uint64_t)x42 & 0x3ffffffffffff);
+ { uint128_t x45 = (x43 + x37);
+ { uint64_t x46 = (uint64_t) (x45 >> 0x32);
+ { uint64_t x47 = ((uint64_t)x45 & 0x3ffffffffffff);
+ { uint128_t x48 = (x46 + x36);
+ { uint64_t x49 = (uint64_t) (x48 >> 0x32);
+ { uint64_t x50 = ((uint64_t)x48 & 0x3ffffffffffff);
+ { uint128_t x51 = (x49 + x35);
+ { uint64_t x52 = (uint64_t) (x51 >> 0x32);
+ { uint64_t x53 = ((uint64_t)x51 & 0x3ffffffffffff);
+ { uint128_t x54 = (x52 + x34);
+ { uint64_t x55 = (uint64_t) (x54 >> 0x32);
+ { uint64_t x56 = ((uint64_t)x54 & 0x3ffffffffffff);
+ { uint128_t x57 = (x55 + x33);
+ { uint64_t x58 = (uint64_t) (x57 >> 0x32);
+ { uint64_t x59 = ((uint64_t)x57 & 0x3ffffffffffff);
+ { uint128_t x60 = (x58 + x32);
+ { uint64_t x61 = (uint64_t) (x60 >> 0x32);
+ { uint64_t x62 = ((uint64_t)x60 & 0x3ffffffffffff);
+ { uint64_t x63 = (x41 + (0x1f * x61));
+ { uint64_t x64 = (x63 >> 0x33);
+ { uint64_t x65 = (x63 & 0x7ffffffffffff);
+ { uint64_t x66 = (x64 + x44);
+ { uint64_t x67 = (x66 >> 0x32);
+ { uint64_t x68 = (x66 & 0x3ffffffffffff);
+ out[0] = x65;
+ out[1] = x68;
+ out[2] = (x67 + x47);
+ out[3] = x50;
+ out[4] = x53;
+ out[5] = x56;
+ out[6] = x59;
+ out[7] = x62;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e401m31/fesquare.c b/src/Specific/solinas64_2e401m31/fesquare.c
index b07c6ed53..b36782beb 100644
--- a/src/Specific/solinas64_2e401m31/fesquare.c
+++ b/src/Specific/solinas64_2e401m31/fesquare.c
@@ -1,66 +1,56 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x15 = (((uint128_t)x2 * x13) + ((0x2 * ((uint128_t)x4 * x14)) + ((0x2 * ((uint128_t)x6 * x12)) + ((0x2 * ((uint128_t)x8 * x10)) + ((0x2 * ((uint128_t)x10 * x8)) + ((0x2 * ((uint128_t)x12 * x6)) + ((0x2 * ((uint128_t)x14 * x4)) + ((uint128_t)x13 * x2))))))));
-{ uint128_t x16 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + ((0x2 * ((uint128_t)x6 * x10)) + ((0x2 * ((uint128_t)x8 * x8)) + ((0x2 * ((uint128_t)x10 * x6)) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x1f * ((uint128_t)x13 * x13)));
-{ uint128_t x17 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0x1f * (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))));
-{ uint128_t x18 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x1f * (((uint128_t)x12 * x13) + (((uint128_t)x14 * x14) + ((uint128_t)x13 * x12)))));
-{ uint128_t x19 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x1f * (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10))))));
-{ uint128_t x20 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x1f * (((uint128_t)x8 * x13) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + (((uint128_t)x14 * x10) + ((uint128_t)x13 * x8)))))));
-{ uint128_t x21 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x1f * (((uint128_t)x6 * x13) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + ((uint128_t)x13 * x6))))))));
-{ uint128_t x22 = (((uint128_t)x2 * x2) + (0x1f * ((0x2 * ((uint128_t)x4 * x13)) + ((0x2 * ((uint128_t)x6 * x14)) + ((0x2 * ((uint128_t)x8 * x12)) + ((0x2 * ((uint128_t)x10 * x10)) + ((0x2 * ((uint128_t)x12 * x8)) + ((0x2 * ((uint128_t)x14 * x6)) + (0x2 * ((uint128_t)x13 * x4))))))))));
-{ uint64_t x23 = (uint64_t) (x22 >> 0x33);
-{ uint64_t x24 = ((uint64_t)x22 & 0x7ffffffffffff);
-{ uint128_t x25 = (x23 + x21);
-{ uint64_t x26 = (uint64_t) (x25 >> 0x32);
-{ uint64_t x27 = ((uint64_t)x25 & 0x3ffffffffffff);
-{ uint128_t x28 = (x26 + x20);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x32);
-{ uint64_t x30 = ((uint64_t)x28 & 0x3ffffffffffff);
-{ uint128_t x31 = (x29 + x19);
-{ uint64_t x32 = (uint64_t) (x31 >> 0x32);
-{ uint64_t x33 = ((uint64_t)x31 & 0x3ffffffffffff);
-{ uint128_t x34 = (x32 + x18);
-{ uint64_t x35 = (uint64_t) (x34 >> 0x32);
-{ uint64_t x36 = ((uint64_t)x34 & 0x3ffffffffffff);
-{ uint128_t x37 = (x35 + x17);
-{ uint64_t x38 = (uint64_t) (x37 >> 0x32);
-{ uint64_t x39 = ((uint64_t)x37 & 0x3ffffffffffff);
-{ uint128_t x40 = (x38 + x16);
-{ uint64_t x41 = (uint64_t) (x40 >> 0x32);
-{ uint64_t x42 = ((uint64_t)x40 & 0x3ffffffffffff);
-{ uint128_t x43 = (x41 + x15);
-{ uint64_t x44 = (uint64_t) (x43 >> 0x32);
-{ uint64_t x45 = ((uint64_t)x43 & 0x3ffffffffffff);
-{ uint64_t x46 = (x24 + (0x1f * x44));
-{ uint64_t x47 = (x46 >> 0x33);
-{ uint64_t x48 = (x46 & 0x7ffffffffffff);
-{ uint64_t x49 = (x47 + x27);
-{ uint64_t x50 = (x49 >> 0x32);
-{ uint64_t x51 = (x49 & 0x3ffffffffffff);
-out[0] = x45;
-out[1] = x42;
-out[2] = x39;
-out[3] = x36;
-out[4] = x33;
-out[5] = x50 + x30;
-out[6] = x51;
-out[7] = x48;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void fesquare(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x15 = (((uint128_t)x2 * x13) + ((0x2 * ((uint128_t)x4 * x14)) + ((0x2 * ((uint128_t)x6 * x12)) + ((0x2 * ((uint128_t)x8 * x10)) + ((0x2 * ((uint128_t)x10 * x8)) + ((0x2 * ((uint128_t)x12 * x6)) + ((0x2 * ((uint128_t)x14 * x4)) + ((uint128_t)x13 * x2))))))));
+ { uint128_t x16 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + ((0x2 * ((uint128_t)x6 * x10)) + ((0x2 * ((uint128_t)x8 * x8)) + ((0x2 * ((uint128_t)x10 * x6)) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x1f * ((uint128_t)x13 * x13)));
+ { uint128_t x17 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0x1f * (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))));
+ { uint128_t x18 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x1f * (((uint128_t)x12 * x13) + (((uint128_t)x14 * x14) + ((uint128_t)x13 * x12)))));
+ { uint128_t x19 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x1f * (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10))))));
+ { uint128_t x20 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x1f * (((uint128_t)x8 * x13) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + (((uint128_t)x14 * x10) + ((uint128_t)x13 * x8)))))));
+ { uint128_t x21 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x1f * (((uint128_t)x6 * x13) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + ((uint128_t)x13 * x6))))))));
+ { uint128_t x22 = (((uint128_t)x2 * x2) + (0x1f * ((0x2 * ((uint128_t)x4 * x13)) + ((0x2 * ((uint128_t)x6 * x14)) + ((0x2 * ((uint128_t)x8 * x12)) + ((0x2 * ((uint128_t)x10 * x10)) + ((0x2 * ((uint128_t)x12 * x8)) + ((0x2 * ((uint128_t)x14 * x6)) + (0x2 * ((uint128_t)x13 * x4))))))))));
+ { uint64_t x23 = (uint64_t) (x22 >> 0x33);
+ { uint64_t x24 = ((uint64_t)x22 & 0x7ffffffffffff);
+ { uint128_t x25 = (x23 + x21);
+ { uint64_t x26 = (uint64_t) (x25 >> 0x32);
+ { uint64_t x27 = ((uint64_t)x25 & 0x3ffffffffffff);
+ { uint128_t x28 = (x26 + x20);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x32);
+ { uint64_t x30 = ((uint64_t)x28 & 0x3ffffffffffff);
+ { uint128_t x31 = (x29 + x19);
+ { uint64_t x32 = (uint64_t) (x31 >> 0x32);
+ { uint64_t x33 = ((uint64_t)x31 & 0x3ffffffffffff);
+ { uint128_t x34 = (x32 + x18);
+ { uint64_t x35 = (uint64_t) (x34 >> 0x32);
+ { uint64_t x36 = ((uint64_t)x34 & 0x3ffffffffffff);
+ { uint128_t x37 = (x35 + x17);
+ { uint64_t x38 = (uint64_t) (x37 >> 0x32);
+ { uint64_t x39 = ((uint64_t)x37 & 0x3ffffffffffff);
+ { uint128_t x40 = (x38 + x16);
+ { uint64_t x41 = (uint64_t) (x40 >> 0x32);
+ { uint64_t x42 = ((uint64_t)x40 & 0x3ffffffffffff);
+ { uint128_t x43 = (x41 + x15);
+ { uint64_t x44 = (uint64_t) (x43 >> 0x32);
+ { uint64_t x45 = ((uint64_t)x43 & 0x3ffffffffffff);
+ { uint64_t x46 = (x24 + (0x1f * x44));
+ { uint64_t x47 = (x46 >> 0x33);
+ { uint64_t x48 = (x46 & 0x7ffffffffffff);
+ { uint64_t x49 = (x47 + x27);
+ { uint64_t x50 = (x49 >> 0x32);
+ { uint64_t x51 = (x49 & 0x3ffffffffffff);
+ out[0] = x48;
+ out[1] = x51;
+ out[2] = (x50 + x30);
+ out[3] = x33;
+ out[4] = x36;
+ out[5] = x39;
+ out[6] = x42;
+ out[7] = x45;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e401m31/freeze.c b/src/Specific/solinas64_2e401m31/freeze.c
index 609dc230b..7801ee0b8 100644
--- a/src/Specific/solinas64_2e401m31/freeze.c
+++ b/src/Specific/solinas64_2e401m31/freeze.c
@@ -1,26 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x16; uint8_t x17 = _subborrow_u51(0x0, x2, 0x7ffffffffffe1, &x16);
-out[0] = uint64_t x19;
-out[1] = uint8_t x20 = Op Syntax.SubWithGetBorrow 50 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 x17;
-out[2] = x4;
-out[3] = 0x3ffffffffffff;;
+static void freeze(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x16; uint8_t x17 = _subborrow_u51(0x0, x2, 0x7ffffffffffe1, &x16);
+ { uint64_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x17, Return x4, 0x3ffffffffffff);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x20, Return x6, 0x3ffffffffffff);
+ { uint64_t x25, uint8_t x26 = Op (Syntax.SubWithGetBorrow 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x8, 0x3ffffffffffff);
+ { uint64_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x26, Return x10, 0x3ffffffffffff);
+ { uint64_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x29, Return x12, 0x3ffffffffffff);
+ { uint64_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x32, Return x14, 0x3ffffffffffff);
+ { uint64_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x35, Return x13, 0x3ffffffffffff);
+ { uint64_t x39 = (uint64_t)cmovznz(x38, 0x0, 0xffffffffffffffffL);
+ { uint64_t x40 = (x39 & 0x7ffffffffffe1);
+ { uint64_t x42; uint8_t x43 = _addcarryx_u51(0x0, x16, x40, &x42);
+ { uint64_t x44 = (x39 & 0x3ffffffffffff);
+ { uint64_t x46, uint8_t x47 = Op (Syntax.AddWithGetCarry 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x43, Return x19, Return x44);
+ { uint64_t x48 = (x39 & 0x3ffffffffffff);
+ { uint64_t x50, uint8_t x51 = Op (Syntax.AddWithGetCarry 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x47, Return x22, Return x48);
+ { uint64_t x52 = (x39 & 0x3ffffffffffff);
+ { uint64_t x54, uint8_t x55 = Op (Syntax.AddWithGetCarry 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x51, Return x25, Return x52);
+ { uint64_t x56 = (x39 & 0x3ffffffffffff);
+ { uint64_t x58, uint8_t x59 = Op (Syntax.AddWithGetCarry 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x55, Return x28, Return x56);
+ { uint64_t x60 = (x39 & 0x3ffffffffffff);
+ { uint64_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x59, Return x31, Return x60);
+ { uint64_t x64 = (x39 & 0x3ffffffffffff);
+ { uint64_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x63, Return x34, Return x64);
+ { uint64_t x68 = (x39 & 0x3ffffffffffff);
+ { uint64_t x70, uint8_t _ = Op (Syntax.AddWithGetCarry 50 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x67, Return x37, Return x68);
+ out[0] = x42;
+ out[1] = x46;
+ out[2] = x50;
+ out[3] = x54;
+ out[4] = x58;
+ out[5] = x62;
+ out[6] = x66;
+ out[7] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e413m21/femul.c b/src/Specific/solinas64_2e413m21/femul.c
index 458c45ca4..6fbb37ad9 100644
--- a/src/Specific/solinas64_2e413m21/femul.c
+++ b/src/Specific/solinas64_2e413m21/femul.c
@@ -1,61 +1,57 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
-{ uint128_t x28 = (((uint128_t)x5 * x26) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + (((uint128_t)x15 * x19) + ((uint128_t)x14 * x17)))))));
-{ uint128_t x29 = ((((uint128_t)x5 * x27) + (((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + (((uint128_t)x11 * x21) + (((uint128_t)x13 * x19) + ((uint128_t)x15 * x17)))))) + (0x15 * ((uint128_t)x14 * x26)));
-{ uint128_t x30 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + (((uint128_t)x11 * x19) + ((uint128_t)x13 * x17))))) + (0x15 * (((uint128_t)x15 * x26) + ((uint128_t)x14 * x27))));
-{ uint128_t x31 = ((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + (((uint128_t)x9 * x19) + ((uint128_t)x11 * x17)))) + (0x15 * (((uint128_t)x13 * x26) + (((uint128_t)x15 * x27) + ((uint128_t)x14 * x25)))));
-{ uint128_t x32 = ((((uint128_t)x5 * x21) + (((uint128_t)x7 * x19) + ((uint128_t)x9 * x17))) + (0x15 * (((uint128_t)x11 * x26) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + ((uint128_t)x14 * x23))))));
-{ ℤ x33 = ((((uint128_t)x5 * x19) + ((uint128_t)x7 * x17)) +ℤ (0x15 *ℤ (((uint128_t)x9 * x26) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + ((uint128_t)x14 * x21)))))));
-{ ℤ x34 = (((uint128_t)x5 * x17) +ℤ (0x15 *ℤ (((uint128_t)x7 * x26) + (((uint128_t)x9 * x27) + (((uint128_t)x11 * x25) + (((uint128_t)x13 * x23) + (((uint128_t)x15 * x21) + ((uint128_t)x14 * x19))))))));
-{ uint128_t x35 = (x34 >> 0x3b);
-{ uint64_t x36 = (x34 & 0x7ffffffffffffff);
-{ ℤ x37 = (x35 +ℤ x33);
-{ uint128_t x38 = (x37 >> 0x3b);
-{ uint64_t x39 = (x37 & 0x7ffffffffffffff);
-{ uint128_t x40 = (x38 + x32);
-{ uint128_t x41 = (x40 >> 0x3b);
-{ uint64_t x42 = ((uint64_t)x40 & 0x7ffffffffffffff);
-{ uint128_t x43 = (x41 + x31);
-{ uint128_t x44 = (x43 >> 0x3b);
-{ uint64_t x45 = ((uint64_t)x43 & 0x7ffffffffffffff);
-{ uint128_t x46 = (x44 + x30);
-{ uint128_t x47 = (x46 >> 0x3b);
-{ uint64_t x48 = ((uint64_t)x46 & 0x7ffffffffffffff);
-{ uint128_t x49 = (x47 + x29);
-{ uint128_t x50 = (x49 >> 0x3b);
-{ uint64_t x51 = ((uint64_t)x49 & 0x7ffffffffffffff);
-{ uint128_t x52 = (x50 + x28);
-{ uint128_t x53 = (x52 >> 0x3b);
-{ uint64_t x54 = ((uint64_t)x52 & 0x7ffffffffffffff);
-{ uint128_t x55 = (x36 + (0x15 * x53));
-{ uint64_t x56 = (uint64_t) (x55 >> 0x3b);
-{ uint64_t x57 = ((uint64_t)x55 & 0x7ffffffffffffff);
-{ uint64_t x58 = (x56 + x39);
-{ uint64_t x59 = (x58 >> 0x3b);
-{ uint64_t x60 = (x58 & 0x7ffffffffffffff);
-out[0] = x54;
-out[1] = x51;
-out[2] = x48;
-out[3] = x45;
-out[4] = x59 + x42;
-out[5] = x60;
-out[6] = x57;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void femul(uint64_t out[7], const uint64_t in1[7], const uint64_t in2[7]) {
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x26 = in2[6];
+ { const uint64_t x27 = in2[5];
+ { const uint64_t x25 = in2[4];
+ { const uint64_t x23 = in2[3];
+ { const uint64_t x21 = in2[2];
+ { const uint64_t x19 = in2[1];
+ { const uint64_t x17 = in2[0];
+ { uint128_t x28 = (((uint128_t)x5 * x26) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + (((uint128_t)x15 * x19) + ((uint128_t)x14 * x17)))))));
+ { uint128_t x29 = ((((uint128_t)x5 * x27) + (((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + (((uint128_t)x11 * x21) + (((uint128_t)x13 * x19) + ((uint128_t)x15 * x17)))))) + (0x15 * ((uint128_t)x14 * x26)));
+ { uint128_t x30 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + (((uint128_t)x11 * x19) + ((uint128_t)x13 * x17))))) + (0x15 * (((uint128_t)x15 * x26) + ((uint128_t)x14 * x27))));
+ { uint128_t x31 = ((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + (((uint128_t)x9 * x19) + ((uint128_t)x11 * x17)))) + (0x15 * (((uint128_t)x13 * x26) + (((uint128_t)x15 * x27) + ((uint128_t)x14 * x25)))));
+ { uint128_t x32 = ((((uint128_t)x5 * x21) + (((uint128_t)x7 * x19) + ((uint128_t)x9 * x17))) + (0x15 * (((uint128_t)x11 * x26) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + ((uint128_t)x14 * x23))))));
+ { ℤ x33 = ((((uint128_t)x5 * x19) + ((uint128_t)x7 * x17)) +ℤ (0x15 *ℤ (((uint128_t)x9 * x26) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + ((uint128_t)x14 * x21)))))));
+ { ℤ x34 = (((uint128_t)x5 * x17) +ℤ (0x15 *ℤ (((uint128_t)x7 * x26) + (((uint128_t)x9 * x27) + (((uint128_t)x11 * x25) + (((uint128_t)x13 * x23) + (((uint128_t)x15 * x21) + ((uint128_t)x14 * x19))))))));
+ { uint128_t x35 = (x34 >> 0x3b);
+ { uint64_t x36 = (x34 & 0x7ffffffffffffff);
+ { ℤ x37 = (x35 +ℤ x33);
+ { uint128_t x38 = (x37 >> 0x3b);
+ { uint64_t x39 = (x37 & 0x7ffffffffffffff);
+ { uint128_t x40 = (x38 + x32);
+ { uint128_t x41 = (x40 >> 0x3b);
+ { uint64_t x42 = ((uint64_t)x40 & 0x7ffffffffffffff);
+ { uint128_t x43 = (x41 + x31);
+ { uint128_t x44 = (x43 >> 0x3b);
+ { uint64_t x45 = ((uint64_t)x43 & 0x7ffffffffffffff);
+ { uint128_t x46 = (x44 + x30);
+ { uint128_t x47 = (x46 >> 0x3b);
+ { uint64_t x48 = ((uint64_t)x46 & 0x7ffffffffffffff);
+ { uint128_t x49 = (x47 + x29);
+ { uint128_t x50 = (x49 >> 0x3b);
+ { uint64_t x51 = ((uint64_t)x49 & 0x7ffffffffffffff);
+ { uint128_t x52 = (x50 + x28);
+ { uint128_t x53 = (x52 >> 0x3b);
+ { uint64_t x54 = ((uint64_t)x52 & 0x7ffffffffffffff);
+ { uint128_t x55 = (x36 + (0x15 * x53));
+ { uint64_t x56 = (uint64_t) (x55 >> 0x3b);
+ { uint64_t x57 = ((uint64_t)x55 & 0x7ffffffffffffff);
+ { uint64_t x58 = (x56 + x39);
+ { uint64_t x59 = (x58 >> 0x3b);
+ { uint64_t x60 = (x58 & 0x7ffffffffffffff);
+ out[0] = x57;
+ out[1] = x60;
+ out[2] = (x59 + x42);
+ out[3] = x45;
+ out[4] = x48;
+ out[5] = x51;
+ out[6] = x54;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e413m21/fesquare.c b/src/Specific/solinas64_2e413m21/fesquare.c
index 0f513bb27..c3e0d7d32 100644
--- a/src/Specific/solinas64_2e413m21/fesquare.c
+++ b/src/Specific/solinas64_2e413m21/fesquare.c
@@ -1,61 +1,50 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x13 = (((uint128_t)x2 * x11) + (((uint128_t)x4 * x12) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + (((uint128_t)x12 * x4) + ((uint128_t)x11 * x2)))))));
-{ uint128_t x14 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x15 * ((uint128_t)x11 * x11)));
-{ uint128_t x15 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0x15 * (((uint128_t)x12 * x11) + ((uint128_t)x11 * x12))));
-{ uint128_t x16 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x15 * (((uint128_t)x10 * x11) + (((uint128_t)x12 * x12) + ((uint128_t)x11 * x10)))));
-{ uint128_t x17 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x15 * (((uint128_t)x8 * x11) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + ((uint128_t)x11 * x8))))));
-{ ℤ x18 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) +ℤ (0x15 *ℤ (((uint128_t)x6 * x11) + (((uint128_t)x8 * x12) + (((uint128_t)x10 * x10) + (((uint128_t)x12 * x8) + ((uint128_t)x11 * x6)))))));
-{ ℤ x19 = (((uint128_t)x2 * x2) +ℤ (0x15 *ℤ (((uint128_t)x4 * x11) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + ((uint128_t)x11 * x4))))))));
-{ uint128_t x20 = (x19 >> 0x3b);
-{ uint64_t x21 = (x19 & 0x7ffffffffffffff);
-{ ℤ x22 = (x20 +ℤ x18);
-{ uint128_t x23 = (x22 >> 0x3b);
-{ uint64_t x24 = (x22 & 0x7ffffffffffffff);
-{ uint128_t x25 = (x23 + x17);
-{ uint128_t x26 = (x25 >> 0x3b);
-{ uint64_t x27 = ((uint64_t)x25 & 0x7ffffffffffffff);
-{ uint128_t x28 = (x26 + x16);
-{ uint128_t x29 = (x28 >> 0x3b);
-{ uint64_t x30 = ((uint64_t)x28 & 0x7ffffffffffffff);
-{ uint128_t x31 = (x29 + x15);
-{ uint128_t x32 = (x31 >> 0x3b);
-{ uint64_t x33 = ((uint64_t)x31 & 0x7ffffffffffffff);
-{ uint128_t x34 = (x32 + x14);
-{ uint128_t x35 = (x34 >> 0x3b);
-{ uint64_t x36 = ((uint64_t)x34 & 0x7ffffffffffffff);
-{ uint128_t x37 = (x35 + x13);
-{ uint128_t x38 = (x37 >> 0x3b);
-{ uint64_t x39 = ((uint64_t)x37 & 0x7ffffffffffffff);
-{ uint128_t x40 = (x21 + (0x15 * x38));
-{ uint64_t x41 = (uint64_t) (x40 >> 0x3b);
-{ uint64_t x42 = ((uint64_t)x40 & 0x7ffffffffffffff);
-{ uint64_t x43 = (x41 + x24);
-{ uint64_t x44 = (x43 >> 0x3b);
-{ uint64_t x45 = (x43 & 0x7ffffffffffffff);
-out[0] = x39;
-out[1] = x36;
-out[2] = x33;
-out[3] = x30;
-out[4] = x44 + x27;
-out[5] = x45;
-out[6] = x42;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[7];
+static void fesquare(uint64_t out[7], const uint64_t in1[7]) {
+ { const uint64_t x11 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x13 = (((uint128_t)x2 * x11) + (((uint128_t)x4 * x12) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + (((uint128_t)x12 * x4) + ((uint128_t)x11 * x2)))))));
+ { uint128_t x14 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x15 * ((uint128_t)x11 * x11)));
+ { uint128_t x15 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0x15 * (((uint128_t)x12 * x11) + ((uint128_t)x11 * x12))));
+ { uint128_t x16 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x15 * (((uint128_t)x10 * x11) + (((uint128_t)x12 * x12) + ((uint128_t)x11 * x10)))));
+ { uint128_t x17 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x15 * (((uint128_t)x8 * x11) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + ((uint128_t)x11 * x8))))));
+ { ℤ x18 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) +ℤ (0x15 *ℤ (((uint128_t)x6 * x11) + (((uint128_t)x8 * x12) + (((uint128_t)x10 * x10) + (((uint128_t)x12 * x8) + ((uint128_t)x11 * x6)))))));
+ { ℤ x19 = (((uint128_t)x2 * x2) +ℤ (0x15 *ℤ (((uint128_t)x4 * x11) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + ((uint128_t)x11 * x4))))))));
+ { uint128_t x20 = (x19 >> 0x3b);
+ { uint64_t x21 = (x19 & 0x7ffffffffffffff);
+ { ℤ x22 = (x20 +ℤ x18);
+ { uint128_t x23 = (x22 >> 0x3b);
+ { uint64_t x24 = (x22 & 0x7ffffffffffffff);
+ { uint128_t x25 = (x23 + x17);
+ { uint128_t x26 = (x25 >> 0x3b);
+ { uint64_t x27 = ((uint64_t)x25 & 0x7ffffffffffffff);
+ { uint128_t x28 = (x26 + x16);
+ { uint128_t x29 = (x28 >> 0x3b);
+ { uint64_t x30 = ((uint64_t)x28 & 0x7ffffffffffffff);
+ { uint128_t x31 = (x29 + x15);
+ { uint128_t x32 = (x31 >> 0x3b);
+ { uint64_t x33 = ((uint64_t)x31 & 0x7ffffffffffffff);
+ { uint128_t x34 = (x32 + x14);
+ { uint128_t x35 = (x34 >> 0x3b);
+ { uint64_t x36 = ((uint64_t)x34 & 0x7ffffffffffffff);
+ { uint128_t x37 = (x35 + x13);
+ { uint128_t x38 = (x37 >> 0x3b);
+ { uint64_t x39 = ((uint64_t)x37 & 0x7ffffffffffffff);
+ { uint128_t x40 = (x21 + (0x15 * x38));
+ { uint64_t x41 = (uint64_t) (x40 >> 0x3b);
+ { uint64_t x42 = ((uint64_t)x40 & 0x7ffffffffffffff);
+ { uint64_t x43 = (x41 + x24);
+ { uint64_t x44 = (x43 >> 0x3b);
+ { uint64_t x45 = (x43 & 0x7ffffffffffffff);
+ out[0] = x42;
+ out[1] = x45;
+ out[2] = (x44 + x27);
+ out[3] = x30;
+ out[4] = x33;
+ out[5] = x36;
+ out[6] = x39;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e413m21/freeze.c b/src/Specific/solinas64_2e413m21/freeze.c
index 3b53d6ed6..82ad82e84 100644
--- a/src/Specific/solinas64_2e413m21/freeze.c
+++ b/src/Specific/solinas64_2e413m21/freeze.c
@@ -1,25 +1,39 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x14;
-out[1] = uint8_t x15 = Op Syntax.SubWithGetBorrow 59 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7ffffffffffffeb;;
+static void freeze(uint64_t out[7], const uint64_t in1[7]) {
+ { const uint64_t x11 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x14, uint8_t x15 = Op (Syntax.SubWithGetBorrow 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x7ffffffffffffeb);
+ { uint64_t x17, uint8_t x18 = Op (Syntax.SubWithGetBorrow 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x15, Return x4, 0x7ffffffffffffff);
+ { uint64_t x20, uint8_t x21 = Op (Syntax.SubWithGetBorrow 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x18, Return x6, 0x7ffffffffffffff);
+ { uint64_t x23, uint8_t x24 = Op (Syntax.SubWithGetBorrow 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x21, Return x8, 0x7ffffffffffffff);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.SubWithGetBorrow 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x24, Return x10, 0x7ffffffffffffff);
+ { uint64_t x29, uint8_t x30 = Op (Syntax.SubWithGetBorrow 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x12, 0x7ffffffffffffff);
+ { uint64_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x30, Return x11, 0x7ffffffffffffff);
+ { uint64_t x34 = (uint64_t)cmovznz(x33, 0x0, 0xffffffffffffffffL);
+ { uint64_t x35 = (x34 & 0x7ffffffffffffeb);
+ { uint64_t x37, uint8_t x38 = Op (Syntax.AddWithGetCarry 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x14, Return x35);
+ { uint64_t x39 = (x34 & 0x7ffffffffffffff);
+ { uint64_t x41, uint8_t x42 = Op (Syntax.AddWithGetCarry 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x38, Return x17, Return x39);
+ { uint64_t x43 = (x34 & 0x7ffffffffffffff);
+ { uint64_t x45, uint8_t x46 = Op (Syntax.AddWithGetCarry 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x42, Return x20, Return x43);
+ { uint64_t x47 = (x34 & 0x7ffffffffffffff);
+ { uint64_t x49, uint8_t x50 = Op (Syntax.AddWithGetCarry 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x46, Return x23, Return x47);
+ { uint64_t x51 = (x34 & 0x7ffffffffffffff);
+ { uint64_t x53, uint8_t x54 = Op (Syntax.AddWithGetCarry 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x50, Return x26, Return x51);
+ { uint64_t x55 = (x34 & 0x7ffffffffffffff);
+ { uint64_t x57, uint8_t x58 = Op (Syntax.AddWithGetCarry 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x54, Return x29, Return x55);
+ { uint64_t x59 = (x34 & 0x7ffffffffffffff);
+ { uint64_t x61, uint8_t _ = Op (Syntax.AddWithGetCarry 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x58, Return x32, Return x59);
+ out[0] = x37;
+ out[1] = x41;
+ out[2] = x45;
+ out[3] = x49;
+ out[4] = x53;
+ out[5] = x57;
+ out[6] = x61;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e414m17/femul.c b/src/Specific/solinas64_2e414m17/femul.c
index 7d8d48e65..1a394a727 100644
--- a/src/Specific/solinas64_2e414m17/femul.c
+++ b/src/Specific/solinas64_2e414m17/femul.c
@@ -1,71 +1,71 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
-{ uint128_t x36 = (((uint128_t)x5 * x34) + (((uint128_t)x7 * x35) + (((uint128_t)x9 * x33) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + (((uint128_t)x19 * x23) + ((uint128_t)x18 * x21)))))))));
-{ uint128_t x37 = ((((uint128_t)x5 * x35) + (((uint128_t)x7 * x33) + (((uint128_t)x9 * x31) + (((uint128_t)x11 * x29) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + (((uint128_t)x17 * x23) + ((uint128_t)x19 * x21)))))))) + (0x11 * ((uint128_t)x18 * x34)));
-{ uint128_t x38 = ((((uint128_t)x5 * x33) + (((uint128_t)x7 * x31) + (((uint128_t)x9 * x29) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + ((uint128_t)x17 * x21))))))) + (0x11 * (((uint128_t)x19 * x34) + ((uint128_t)x18 * x35))));
-{ uint128_t x39 = ((((uint128_t)x5 * x31) + (((uint128_t)x7 * x29) + (((uint128_t)x9 * x27) + (((uint128_t)x11 * x25) + (((uint128_t)x13 * x23) + ((uint128_t)x15 * x21)))))) + (0x11 * (((uint128_t)x17 * x34) + (((uint128_t)x19 * x35) + ((uint128_t)x18 * x33)))));
-{ uint128_t x40 = ((((uint128_t)x5 * x29) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + ((uint128_t)x13 * x21))))) + (0x11 * (((uint128_t)x15 * x34) + (((uint128_t)x17 * x35) + (((uint128_t)x19 * x33) + ((uint128_t)x18 * x31))))));
-{ uint128_t x41 = ((((uint128_t)x5 * x27) + (((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21)))) + (0x11 * (((uint128_t)x13 * x34) + (((uint128_t)x15 * x35) + (((uint128_t)x17 * x33) + (((uint128_t)x19 * x31) + ((uint128_t)x18 * x29)))))));
-{ uint128_t x42 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + ((uint128_t)x9 * x21))) + (0x11 * (((uint128_t)x11 * x34) + (((uint128_t)x13 * x35) + (((uint128_t)x15 * x33) + (((uint128_t)x17 * x31) + (((uint128_t)x19 * x29) + ((uint128_t)x18 * x27))))))));
-{ uint128_t x43 = ((((uint128_t)x5 * x23) + ((uint128_t)x7 * x21)) + (0x11 * (((uint128_t)x9 * x34) + (((uint128_t)x11 * x35) + (((uint128_t)x13 * x33) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + (((uint128_t)x19 * x27) + ((uint128_t)x18 * x25)))))))));
-{ uint128_t x44 = (((uint128_t)x5 * x21) + (0x11 * (((uint128_t)x7 * x34) + (((uint128_t)x9 * x35) + (((uint128_t)x11 * x33) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + (((uint128_t)x17 * x27) + (((uint128_t)x19 * x25) + ((uint128_t)x18 * x23))))))))));
-{ uint64_t x45 = (uint64_t) (x44 >> 0x2e);
-{ uint64_t x46 = ((uint64_t)x44 & 0x3fffffffffff);
-{ uint128_t x47 = (x45 + x43);
-{ uint64_t x48 = (uint64_t) (x47 >> 0x2e);
-{ uint64_t x49 = ((uint64_t)x47 & 0x3fffffffffff);
-{ uint128_t x50 = (x48 + x42);
-{ uint64_t x51 = (uint64_t) (x50 >> 0x2e);
-{ uint64_t x52 = ((uint64_t)x50 & 0x3fffffffffff);
-{ uint128_t x53 = (x51 + x41);
-{ uint64_t x54 = (uint64_t) (x53 >> 0x2e);
-{ uint64_t x55 = ((uint64_t)x53 & 0x3fffffffffff);
-{ uint128_t x56 = (x54 + x40);
-{ uint64_t x57 = (uint64_t) (x56 >> 0x2e);
-{ uint64_t x58 = ((uint64_t)x56 & 0x3fffffffffff);
-{ uint128_t x59 = (x57 + x39);
-{ uint64_t x60 = (uint64_t) (x59 >> 0x2e);
-{ uint64_t x61 = ((uint64_t)x59 & 0x3fffffffffff);
-{ uint128_t x62 = (x60 + x38);
-{ uint64_t x63 = (uint64_t) (x62 >> 0x2e);
-{ uint64_t x64 = ((uint64_t)x62 & 0x3fffffffffff);
-{ uint128_t x65 = (x63 + x37);
-{ uint64_t x66 = (uint64_t) (x65 >> 0x2e);
-{ uint64_t x67 = ((uint64_t)x65 & 0x3fffffffffff);
-{ uint128_t x68 = (x66 + x36);
-{ uint64_t x69 = (uint64_t) (x68 >> 0x2e);
-{ uint64_t x70 = ((uint64_t)x68 & 0x3fffffffffff);
-{ uint64_t x71 = (x46 + (0x11 * x69));
-{ uint64_t x72 = (x71 >> 0x2e);
-{ uint64_t x73 = (x71 & 0x3fffffffffff);
-{ uint64_t x74 = (x72 + x49);
-{ uint64_t x75 = (x74 >> 0x2e);
-{ uint64_t x76 = (x74 & 0x3fffffffffff);
-out[0] = x70;
-out[1] = x67;
-out[2] = x64;
-out[3] = x61;
-out[4] = x58;
-out[5] = x55;
-out[6] = x75 + x52;
-out[7] = x76;
-out[8] = x73;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void femul(uint64_t out[9], const uint64_t in1[9], const uint64_t in2[9]) {
+ { const uint64_t x18 = in1[8];
+ { const uint64_t x19 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x34 = in2[8];
+ { const uint64_t x35 = in2[7];
+ { const uint64_t x33 = in2[6];
+ { const uint64_t x31 = in2[5];
+ { const uint64_t x29 = in2[4];
+ { const uint64_t x27 = in2[3];
+ { const uint64_t x25 = in2[2];
+ { const uint64_t x23 = in2[1];
+ { const uint64_t x21 = in2[0];
+ { uint128_t x36 = (((uint128_t)x5 * x34) + (((uint128_t)x7 * x35) + (((uint128_t)x9 * x33) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + (((uint128_t)x19 * x23) + ((uint128_t)x18 * x21)))))))));
+ { uint128_t x37 = ((((uint128_t)x5 * x35) + (((uint128_t)x7 * x33) + (((uint128_t)x9 * x31) + (((uint128_t)x11 * x29) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + (((uint128_t)x17 * x23) + ((uint128_t)x19 * x21)))))))) + (0x11 * ((uint128_t)x18 * x34)));
+ { uint128_t x38 = ((((uint128_t)x5 * x33) + (((uint128_t)x7 * x31) + (((uint128_t)x9 * x29) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + ((uint128_t)x17 * x21))))))) + (0x11 * (((uint128_t)x19 * x34) + ((uint128_t)x18 * x35))));
+ { uint128_t x39 = ((((uint128_t)x5 * x31) + (((uint128_t)x7 * x29) + (((uint128_t)x9 * x27) + (((uint128_t)x11 * x25) + (((uint128_t)x13 * x23) + ((uint128_t)x15 * x21)))))) + (0x11 * (((uint128_t)x17 * x34) + (((uint128_t)x19 * x35) + ((uint128_t)x18 * x33)))));
+ { uint128_t x40 = ((((uint128_t)x5 * x29) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + ((uint128_t)x13 * x21))))) + (0x11 * (((uint128_t)x15 * x34) + (((uint128_t)x17 * x35) + (((uint128_t)x19 * x33) + ((uint128_t)x18 * x31))))));
+ { uint128_t x41 = ((((uint128_t)x5 * x27) + (((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21)))) + (0x11 * (((uint128_t)x13 * x34) + (((uint128_t)x15 * x35) + (((uint128_t)x17 * x33) + (((uint128_t)x19 * x31) + ((uint128_t)x18 * x29)))))));
+ { uint128_t x42 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + ((uint128_t)x9 * x21))) + (0x11 * (((uint128_t)x11 * x34) + (((uint128_t)x13 * x35) + (((uint128_t)x15 * x33) + (((uint128_t)x17 * x31) + (((uint128_t)x19 * x29) + ((uint128_t)x18 * x27))))))));
+ { uint128_t x43 = ((((uint128_t)x5 * x23) + ((uint128_t)x7 * x21)) + (0x11 * (((uint128_t)x9 * x34) + (((uint128_t)x11 * x35) + (((uint128_t)x13 * x33) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + (((uint128_t)x19 * x27) + ((uint128_t)x18 * x25)))))))));
+ { uint128_t x44 = (((uint128_t)x5 * x21) + (0x11 * (((uint128_t)x7 * x34) + (((uint128_t)x9 * x35) + (((uint128_t)x11 * x33) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + (((uint128_t)x17 * x27) + (((uint128_t)x19 * x25) + ((uint128_t)x18 * x23))))))))));
+ { uint64_t x45 = (uint64_t) (x44 >> 0x2e);
+ { uint64_t x46 = ((uint64_t)x44 & 0x3fffffffffff);
+ { uint128_t x47 = (x45 + x43);
+ { uint64_t x48 = (uint64_t) (x47 >> 0x2e);
+ { uint64_t x49 = ((uint64_t)x47 & 0x3fffffffffff);
+ { uint128_t x50 = (x48 + x42);
+ { uint64_t x51 = (uint64_t) (x50 >> 0x2e);
+ { uint64_t x52 = ((uint64_t)x50 & 0x3fffffffffff);
+ { uint128_t x53 = (x51 + x41);
+ { uint64_t x54 = (uint64_t) (x53 >> 0x2e);
+ { uint64_t x55 = ((uint64_t)x53 & 0x3fffffffffff);
+ { uint128_t x56 = (x54 + x40);
+ { uint64_t x57 = (uint64_t) (x56 >> 0x2e);
+ { uint64_t x58 = ((uint64_t)x56 & 0x3fffffffffff);
+ { uint128_t x59 = (x57 + x39);
+ { uint64_t x60 = (uint64_t) (x59 >> 0x2e);
+ { uint64_t x61 = ((uint64_t)x59 & 0x3fffffffffff);
+ { uint128_t x62 = (x60 + x38);
+ { uint64_t x63 = (uint64_t) (x62 >> 0x2e);
+ { uint64_t x64 = ((uint64_t)x62 & 0x3fffffffffff);
+ { uint128_t x65 = (x63 + x37);
+ { uint64_t x66 = (uint64_t) (x65 >> 0x2e);
+ { uint64_t x67 = ((uint64_t)x65 & 0x3fffffffffff);
+ { uint128_t x68 = (x66 + x36);
+ { uint64_t x69 = (uint64_t) (x68 >> 0x2e);
+ { uint64_t x70 = ((uint64_t)x68 & 0x3fffffffffff);
+ { uint64_t x71 = (x46 + (0x11 * x69));
+ { uint64_t x72 = (x71 >> 0x2e);
+ { uint64_t x73 = (x71 & 0x3fffffffffff);
+ { uint64_t x74 = (x72 + x49);
+ { uint64_t x75 = (x74 >> 0x2e);
+ { uint64_t x76 = (x74 & 0x3fffffffffff);
+ out[0] = x73;
+ out[1] = x76;
+ out[2] = (x75 + x52);
+ out[3] = x55;
+ out[4] = x58;
+ out[5] = x61;
+ out[6] = x64;
+ out[7] = x67;
+ out[8] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e414m17/fesquare.c b/src/Specific/solinas64_2e414m17/fesquare.c
index d07123153..596e43cf7 100644
--- a/src/Specific/solinas64_2e414m17/fesquare.c
+++ b/src/Specific/solinas64_2e414m17/fesquare.c
@@ -1,71 +1,62 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x17 = (((uint128_t)x2 * x15) + (((uint128_t)x4 * x16) + (((uint128_t)x6 * x14) + (((uint128_t)x8 * x12) + (((uint128_t)x10 * x10) + (((uint128_t)x12 * x8) + (((uint128_t)x14 * x6) + (((uint128_t)x16 * x4) + ((uint128_t)x15 * x2)))))))));
-{ uint128_t x18 = ((((uint128_t)x2 * x16) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x16 * x2)))))))) + (0x11 * ((uint128_t)x15 * x15)));
-{ uint128_t x19 = ((((uint128_t)x2 * x14) + (((uint128_t)x4 * x12) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + (((uint128_t)x12 * x4) + ((uint128_t)x14 * x2))))))) + (0x11 * (((uint128_t)x16 * x15) + ((uint128_t)x15 * x16))));
-{ uint128_t x20 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x11 * (((uint128_t)x14 * x15) + (((uint128_t)x16 * x16) + ((uint128_t)x15 * x14)))));
-{ uint128_t x21 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0x11 * (((uint128_t)x12 * x15) + (((uint128_t)x14 * x16) + (((uint128_t)x16 * x14) + ((uint128_t)x15 * x12))))));
-{ uint128_t x22 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x11 * (((uint128_t)x10 * x15) + (((uint128_t)x12 * x16) + (((uint128_t)x14 * x14) + (((uint128_t)x16 * x12) + ((uint128_t)x15 * x10)))))));
-{ uint128_t x23 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x11 * (((uint128_t)x8 * x15) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + ((uint128_t)x15 * x8))))))));
-{ uint128_t x24 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * (((uint128_t)x6 * x15) + (((uint128_t)x8 * x16) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + (((uint128_t)x14 * x10) + (((uint128_t)x16 * x8) + ((uint128_t)x15 * x6)))))))));
-{ uint128_t x25 = (((uint128_t)x2 * x2) + (0x11 * (((uint128_t)x4 * x15) + (((uint128_t)x6 * x16) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + (((uint128_t)x16 * x6) + ((uint128_t)x15 * x4))))))))));
-{ uint64_t x26 = (uint64_t) (x25 >> 0x2e);
-{ uint64_t x27 = ((uint64_t)x25 & 0x3fffffffffff);
-{ uint128_t x28 = (x26 + x24);
-{ uint64_t x29 = (uint64_t) (x28 >> 0x2e);
-{ uint64_t x30 = ((uint64_t)x28 & 0x3fffffffffff);
-{ uint128_t x31 = (x29 + x23);
-{ uint64_t x32 = (uint64_t) (x31 >> 0x2e);
-{ uint64_t x33 = ((uint64_t)x31 & 0x3fffffffffff);
-{ uint128_t x34 = (x32 + x22);
-{ uint64_t x35 = (uint64_t) (x34 >> 0x2e);
-{ uint64_t x36 = ((uint64_t)x34 & 0x3fffffffffff);
-{ uint128_t x37 = (x35 + x21);
-{ uint64_t x38 = (uint64_t) (x37 >> 0x2e);
-{ uint64_t x39 = ((uint64_t)x37 & 0x3fffffffffff);
-{ uint128_t x40 = (x38 + x20);
-{ uint64_t x41 = (uint64_t) (x40 >> 0x2e);
-{ uint64_t x42 = ((uint64_t)x40 & 0x3fffffffffff);
-{ uint128_t x43 = (x41 + x19);
-{ uint64_t x44 = (uint64_t) (x43 >> 0x2e);
-{ uint64_t x45 = ((uint64_t)x43 & 0x3fffffffffff);
-{ uint128_t x46 = (x44 + x18);
-{ uint64_t x47 = (uint64_t) (x46 >> 0x2e);
-{ uint64_t x48 = ((uint64_t)x46 & 0x3fffffffffff);
-{ uint128_t x49 = (x47 + x17);
-{ uint64_t x50 = (uint64_t) (x49 >> 0x2e);
-{ uint64_t x51 = ((uint64_t)x49 & 0x3fffffffffff);
-{ uint64_t x52 = (x27 + (0x11 * x50));
-{ uint64_t x53 = (x52 >> 0x2e);
-{ uint64_t x54 = (x52 & 0x3fffffffffff);
-{ uint64_t x55 = (x53 + x30);
-{ uint64_t x56 = (x55 >> 0x2e);
-{ uint64_t x57 = (x55 & 0x3fffffffffff);
-out[0] = x51;
-out[1] = x48;
-out[2] = x45;
-out[3] = x42;
-out[4] = x39;
-out[5] = x36;
-out[6] = x56 + x33;
-out[7] = x57;
-out[8] = x54;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void fesquare(uint64_t out[9], const uint64_t in1[9]) {
+ { const uint64_t x15 = in1[8];
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x17 = (((uint128_t)x2 * x15) + (((uint128_t)x4 * x16) + (((uint128_t)x6 * x14) + (((uint128_t)x8 * x12) + (((uint128_t)x10 * x10) + (((uint128_t)x12 * x8) + (((uint128_t)x14 * x6) + (((uint128_t)x16 * x4) + ((uint128_t)x15 * x2)))))))));
+ { uint128_t x18 = ((((uint128_t)x2 * x16) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x16 * x2)))))))) + (0x11 * ((uint128_t)x15 * x15)));
+ { uint128_t x19 = ((((uint128_t)x2 * x14) + (((uint128_t)x4 * x12) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + (((uint128_t)x12 * x4) + ((uint128_t)x14 * x2))))))) + (0x11 * (((uint128_t)x16 * x15) + ((uint128_t)x15 * x16))));
+ { uint128_t x20 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x11 * (((uint128_t)x14 * x15) + (((uint128_t)x16 * x16) + ((uint128_t)x15 * x14)))));
+ { uint128_t x21 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0x11 * (((uint128_t)x12 * x15) + (((uint128_t)x14 * x16) + (((uint128_t)x16 * x14) + ((uint128_t)x15 * x12))))));
+ { uint128_t x22 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x11 * (((uint128_t)x10 * x15) + (((uint128_t)x12 * x16) + (((uint128_t)x14 * x14) + (((uint128_t)x16 * x12) + ((uint128_t)x15 * x10)))))));
+ { uint128_t x23 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x11 * (((uint128_t)x8 * x15) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + ((uint128_t)x15 * x8))))))));
+ { uint128_t x24 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * (((uint128_t)x6 * x15) + (((uint128_t)x8 * x16) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + (((uint128_t)x14 * x10) + (((uint128_t)x16 * x8) + ((uint128_t)x15 * x6)))))))));
+ { uint128_t x25 = (((uint128_t)x2 * x2) + (0x11 * (((uint128_t)x4 * x15) + (((uint128_t)x6 * x16) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + (((uint128_t)x16 * x6) + ((uint128_t)x15 * x4))))))))));
+ { uint64_t x26 = (uint64_t) (x25 >> 0x2e);
+ { uint64_t x27 = ((uint64_t)x25 & 0x3fffffffffff);
+ { uint128_t x28 = (x26 + x24);
+ { uint64_t x29 = (uint64_t) (x28 >> 0x2e);
+ { uint64_t x30 = ((uint64_t)x28 & 0x3fffffffffff);
+ { uint128_t x31 = (x29 + x23);
+ { uint64_t x32 = (uint64_t) (x31 >> 0x2e);
+ { uint64_t x33 = ((uint64_t)x31 & 0x3fffffffffff);
+ { uint128_t x34 = (x32 + x22);
+ { uint64_t x35 = (uint64_t) (x34 >> 0x2e);
+ { uint64_t x36 = ((uint64_t)x34 & 0x3fffffffffff);
+ { uint128_t x37 = (x35 + x21);
+ { uint64_t x38 = (uint64_t) (x37 >> 0x2e);
+ { uint64_t x39 = ((uint64_t)x37 & 0x3fffffffffff);
+ { uint128_t x40 = (x38 + x20);
+ { uint64_t x41 = (uint64_t) (x40 >> 0x2e);
+ { uint64_t x42 = ((uint64_t)x40 & 0x3fffffffffff);
+ { uint128_t x43 = (x41 + x19);
+ { uint64_t x44 = (uint64_t) (x43 >> 0x2e);
+ { uint64_t x45 = ((uint64_t)x43 & 0x3fffffffffff);
+ { uint128_t x46 = (x44 + x18);
+ { uint64_t x47 = (uint64_t) (x46 >> 0x2e);
+ { uint64_t x48 = ((uint64_t)x46 & 0x3fffffffffff);
+ { uint128_t x49 = (x47 + x17);
+ { uint64_t x50 = (uint64_t) (x49 >> 0x2e);
+ { uint64_t x51 = ((uint64_t)x49 & 0x3fffffffffff);
+ { uint64_t x52 = (x27 + (0x11 * x50));
+ { uint64_t x53 = (x52 >> 0x2e);
+ { uint64_t x54 = (x52 & 0x3fffffffffff);
+ { uint64_t x55 = (x53 + x30);
+ { uint64_t x56 = (x55 >> 0x2e);
+ { uint64_t x57 = (x55 & 0x3fffffffffff);
+ out[0] = x54;
+ out[1] = x57;
+ out[2] = (x56 + x33);
+ out[3] = x36;
+ out[4] = x39;
+ out[5] = x42;
+ out[6] = x45;
+ out[7] = x48;
+ out[8] = x51;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e414m17/freeze.c b/src/Specific/solinas64_2e414m17/freeze.c
index 9098c4e42..10cbf785a 100644
--- a/src/Specific/solinas64_2e414m17/freeze.c
+++ b/src/Specific/solinas64_2e414m17/freeze.c
@@ -1,25 +1,49 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x18;
-out[1] = uint8_t x19 = Op Syntax.SubWithGetBorrow 46 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x3fffffffffef;;
+static void freeze(uint64_t out[9], const uint64_t in1[9]) {
+ { const uint64_t x15 = in1[8];
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x3fffffffffef);
+ { uint64_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x19, Return x4, 0x3fffffffffff);
+ { uint64_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x22, Return x6, 0x3fffffffffff);
+ { uint64_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x25, Return x8, 0x3fffffffffff);
+ { uint64_t x30, uint8_t x31 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x28, Return x10, 0x3fffffffffff);
+ { uint64_t x33, uint8_t x34 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x31, Return x12, 0x3fffffffffff);
+ { uint64_t x36, uint8_t x37 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x34, Return x14, 0x3fffffffffff);
+ { uint64_t x39, uint8_t x40 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x37, Return x16, 0x3fffffffffff);
+ { uint64_t x42, uint8_t x43 = Op (Syntax.SubWithGetBorrow 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x40, Return x15, 0x3fffffffffff);
+ { uint64_t x44 = (uint64_t)cmovznz(x43, 0x0, 0xffffffffffffffffL);
+ { uint64_t x45 = (x44 & 0x3fffffffffef);
+ { uint64_t x47, uint8_t x48 = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x18, Return x45);
+ { uint64_t x49 = (x44 & 0x3fffffffffff);
+ { uint64_t x51, uint8_t x52 = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x48, Return x21, Return x49);
+ { uint64_t x53 = (x44 & 0x3fffffffffff);
+ { uint64_t x55, uint8_t x56 = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x52, Return x24, Return x53);
+ { uint64_t x57 = (x44 & 0x3fffffffffff);
+ { uint64_t x59, uint8_t x60 = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x56, Return x27, Return x57);
+ { uint64_t x61 = (x44 & 0x3fffffffffff);
+ { uint64_t x63, uint8_t x64 = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x60, Return x30, Return x61);
+ { uint64_t x65 = (x44 & 0x3fffffffffff);
+ { uint64_t x67, uint8_t x68 = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x64, Return x33, Return x65);
+ { uint64_t x69 = (x44 & 0x3fffffffffff);
+ { uint64_t x71, uint8_t x72 = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x68, Return x36, Return x69);
+ { uint64_t x73 = (x44 & 0x3fffffffffff);
+ { uint64_t x75, uint8_t x76 = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x72, Return x39, Return x73);
+ { uint64_t x77 = (x44 & 0x3fffffffffff);
+ { uint64_t x79, uint8_t _ = Op (Syntax.AddWithGetCarry 46 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x76, Return x42, Return x77);
+ out[0] = x47;
+ out[1] = x51;
+ out[2] = x55;
+ out[3] = x59;
+ out[4] = x63;
+ out[5] = x67;
+ out[6] = x71;
+ out[7] = x75;
+ out[8] = x79;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e416m2e208m1/femul.c b/src/Specific/solinas64_2e416m2e208m1/femul.c
index fc04758dc..f86ca1d3f 100644
--- a/src/Specific/solinas64_2e416m2e208m1/femul.c
+++ b/src/Specific/solinas64_2e416m2e208m1/femul.c
@@ -1,83 +1,81 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint128_t x32 = (((uint128_t)(x11 + x16) * (x25 + x30)) - ((uint128_t)x11 * x25));
-{ uint128_t x33 = ((((uint128_t)(x9 + x17) * (x25 + x30)) + ((uint128_t)(x11 + x16) * (x23 + x31))) - (((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)));
-{ uint128_t x34 = ((((uint128_t)(x7 + x15) * (x25 + x30)) + (((uint128_t)(x9 + x17) * (x23 + x31)) + ((uint128_t)(x11 + x16) * (x21 + x29)))) - (((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21))));
-{ uint128_t x35 = ((((uint128_t)(x5 + x13) * (x25 + x30)) + (((uint128_t)(x7 + x15) * (x23 + x31)) + (((uint128_t)(x9 + x17) * (x21 + x29)) + ((uint128_t)(x11 + x16) * (x19 + x27))))) - (((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))));
-{ uint128_t x36 = ((((uint128_t)(x5 + x13) * (x23 + x31)) + (((uint128_t)(x7 + x15) * (x21 + x29)) + ((uint128_t)(x9 + x17) * (x19 + x27)))) - (((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + ((uint128_t)x9 * x19))));
-{ uint128_t x37 = ((((uint128_t)(x5 + x13) * (x21 + x29)) + ((uint128_t)(x7 + x15) * (x19 + x27))) - (((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)));
-{ uint128_t x38 = (((uint128_t)(x5 + x13) * (x19 + x27)) - ((uint128_t)x5 * x19));
-{ uint128_t x39 = (((((uint128_t)x11 * x25) + ((uint128_t)x16 * x30)) + x36) + x32);
-{ uint128_t x40 = ((((((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)) + (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))) + x37) + x33);
-{ uint128_t x41 = ((((((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21))) + (((uint128_t)x15 * x30) + (((uint128_t)x17 * x31) + ((uint128_t)x16 * x29)))) + x38) + x34);
-{ uint128_t x42 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))) + (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27)))));
-{ uint128_t x43 = (((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + ((uint128_t)x9 * x19))) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + ((uint128_t)x17 * x27)))) + x32);
-{ uint128_t x44 = (((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (((uint128_t)x13 * x29) + ((uint128_t)x15 * x27))) + x33);
-{ uint128_t x45 = ((((uint128_t)x5 * x19) + ((uint128_t)x13 * x27)) + x34);
-{ uint64_t x46 = (uint64_t) (x42 >> 0x34);
-{ uint64_t x47 = ((uint64_t)x42 & 0xfffffffffffff);
-{ uint64_t x48 = (uint64_t) (x35 >> 0x34);
-{ uint64_t x49 = ((uint64_t)x35 & 0xfffffffffffff);
-{ uint128_t x50 = (((uint128_t)0x10000000000000 * x48) + x49);
-{ uint64_t x51 = (uint64_t) (x50 >> 0x34);
-{ uint64_t x52 = ((uint64_t)x50 & 0xfffffffffffff);
-{ uint128_t x53 = ((x46 + x41) + x51);
-{ uint64_t x54 = (uint64_t) (x53 >> 0x34);
-{ uint64_t x55 = ((uint64_t)x53 & 0xfffffffffffff);
-{ uint128_t x56 = (x45 + x51);
-{ uint64_t x57 = (uint64_t) (x56 >> 0x34);
-{ uint64_t x58 = ((uint64_t)x56 & 0xfffffffffffff);
-{ uint128_t x59 = (x54 + x40);
-{ uint64_t x60 = (uint64_t) (x59 >> 0x34);
-{ uint64_t x61 = ((uint64_t)x59 & 0xfffffffffffff);
-{ uint128_t x62 = (x57 + x44);
-{ uint64_t x63 = (uint64_t) (x62 >> 0x34);
-{ uint64_t x64 = ((uint64_t)x62 & 0xfffffffffffff);
-{ uint128_t x65 = (x60 + x39);
-{ uint64_t x66 = (uint64_t) (x65 >> 0x34);
-{ uint64_t x67 = ((uint64_t)x65 & 0xfffffffffffff);
-{ uint128_t x68 = (x63 + x43);
-{ uint64_t x69 = (uint64_t) (x68 >> 0x34);
-{ uint64_t x70 = ((uint64_t)x68 & 0xfffffffffffff);
-{ uint64_t x71 = (x66 + x52);
-{ uint64_t x72 = (x71 >> 0x34);
-{ uint64_t x73 = (x71 & 0xfffffffffffff);
-{ uint64_t x74 = (x69 + x47);
-{ uint64_t x75 = (x74 >> 0x34);
-{ uint64_t x76 = (x74 & 0xfffffffffffff);
-{ uint64_t x77 = ((0x10000000000000 * x72) + x73);
-{ uint64_t x78 = (x77 >> 0x34);
-{ uint64_t x79 = (x77 & 0xfffffffffffff);
-{ uint64_t x80 = ((x75 + x55) + x78);
-{ uint64_t x81 = (x80 >> 0x34);
-{ uint64_t x82 = (x80 & 0xfffffffffffff);
-{ uint64_t x83 = (x58 + x78);
-{ uint64_t x84 = (x83 >> 0x34);
-{ uint64_t x85 = (x83 & 0xfffffffffffff);
-out[0] = x79;
-out[1] = x67;
-out[2] = x81 + x61;
-out[3] = x82;
-out[4] = x76;
-out[5] = x70;
-out[6] = x84 + x64;
-out[7] = x85;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void femul(uint64_t out[8], const uint64_t in1[8], const uint64_t in2[8]) {
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x30 = in2[7];
+ { const uint64_t x31 = in2[6];
+ { const uint64_t x29 = in2[5];
+ { const uint64_t x27 = in2[4];
+ { const uint64_t x25 = in2[3];
+ { const uint64_t x23 = in2[2];
+ { const uint64_t x21 = in2[1];
+ { const uint64_t x19 = in2[0];
+ { uint128_t x32 = (((uint128_t)(x11 + x16) * (x25 + x30)) - ((uint128_t)x11 * x25));
+ { uint128_t x33 = ((((uint128_t)(x9 + x17) * (x25 + x30)) + ((uint128_t)(x11 + x16) * (x23 + x31))) - (((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)));
+ { uint128_t x34 = ((((uint128_t)(x7 + x15) * (x25 + x30)) + (((uint128_t)(x9 + x17) * (x23 + x31)) + ((uint128_t)(x11 + x16) * (x21 + x29)))) - (((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21))));
+ { uint128_t x35 = ((((uint128_t)(x5 + x13) * (x25 + x30)) + (((uint128_t)(x7 + x15) * (x23 + x31)) + (((uint128_t)(x9 + x17) * (x21 + x29)) + ((uint128_t)(x11 + x16) * (x19 + x27))))) - (((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))));
+ { uint128_t x36 = ((((uint128_t)(x5 + x13) * (x23 + x31)) + (((uint128_t)(x7 + x15) * (x21 + x29)) + ((uint128_t)(x9 + x17) * (x19 + x27)))) - (((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + ((uint128_t)x9 * x19))));
+ { uint128_t x37 = ((((uint128_t)(x5 + x13) * (x21 + x29)) + ((uint128_t)(x7 + x15) * (x19 + x27))) - (((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)));
+ { uint128_t x38 = (((uint128_t)(x5 + x13) * (x19 + x27)) - ((uint128_t)x5 * x19));
+ { uint128_t x39 = (((((uint128_t)x11 * x25) + ((uint128_t)x16 * x30)) + x36) + x32);
+ { uint128_t x40 = ((((((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)) + (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))) + x37) + x33);
+ { uint128_t x41 = ((((((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21))) + (((uint128_t)x15 * x30) + (((uint128_t)x17 * x31) + ((uint128_t)x16 * x29)))) + x38) + x34);
+ { uint128_t x42 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))) + (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27)))));
+ { uint128_t x43 = (((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + ((uint128_t)x9 * x19))) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + ((uint128_t)x17 * x27)))) + x32);
+ { uint128_t x44 = (((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (((uint128_t)x13 * x29) + ((uint128_t)x15 * x27))) + x33);
+ { uint128_t x45 = ((((uint128_t)x5 * x19) + ((uint128_t)x13 * x27)) + x34);
+ { uint64_t x46 = (uint64_t) (x42 >> 0x34);
+ { uint64_t x47 = ((uint64_t)x42 & 0xfffffffffffff);
+ { uint64_t x48 = (uint64_t) (x35 >> 0x34);
+ { uint64_t x49 = ((uint64_t)x35 & 0xfffffffffffff);
+ { uint128_t x50 = (((uint128_t)0x10000000000000 * x48) + x49);
+ { uint64_t x51 = (uint64_t) (x50 >> 0x34);
+ { uint64_t x52 = ((uint64_t)x50 & 0xfffffffffffff);
+ { uint128_t x53 = ((x46 + x41) + x51);
+ { uint64_t x54 = (uint64_t) (x53 >> 0x34);
+ { uint64_t x55 = ((uint64_t)x53 & 0xfffffffffffff);
+ { uint128_t x56 = (x45 + x51);
+ { uint64_t x57 = (uint64_t) (x56 >> 0x34);
+ { uint64_t x58 = ((uint64_t)x56 & 0xfffffffffffff);
+ { uint128_t x59 = (x54 + x40);
+ { uint64_t x60 = (uint64_t) (x59 >> 0x34);
+ { uint64_t x61 = ((uint64_t)x59 & 0xfffffffffffff);
+ { uint128_t x62 = (x57 + x44);
+ { uint64_t x63 = (uint64_t) (x62 >> 0x34);
+ { uint64_t x64 = ((uint64_t)x62 & 0xfffffffffffff);
+ { uint128_t x65 = (x60 + x39);
+ { uint64_t x66 = (uint64_t) (x65 >> 0x34);
+ { uint64_t x67 = ((uint64_t)x65 & 0xfffffffffffff);
+ { uint128_t x68 = (x63 + x43);
+ { uint64_t x69 = (uint64_t) (x68 >> 0x34);
+ { uint64_t x70 = ((uint64_t)x68 & 0xfffffffffffff);
+ { uint64_t x71 = (x66 + x52);
+ { uint64_t x72 = (x71 >> 0x34);
+ { uint64_t x73 = (x71 & 0xfffffffffffff);
+ { uint64_t x74 = (x69 + x47);
+ { uint64_t x75 = (x74 >> 0x34);
+ { uint64_t x76 = (x74 & 0xfffffffffffff);
+ { uint64_t x77 = ((0x10000000000000 * x72) + x73);
+ { uint64_t x78 = (x77 >> 0x34);
+ { uint64_t x79 = (x77 & 0xfffffffffffff);
+ { uint64_t x80 = ((x75 + x55) + x78);
+ { uint64_t x81 = (x80 >> 0x34);
+ { uint64_t x82 = (x80 & 0xfffffffffffff);
+ { uint64_t x83 = (x58 + x78);
+ { uint64_t x84 = (x83 >> 0x34);
+ { uint64_t x85 = (x83 & 0xfffffffffffff);
+ out[0] = x85;
+ out[1] = (x84 + x64);
+ out[2] = x70;
+ out[3] = x76;
+ out[4] = x82;
+ out[5] = (x81 + x61);
+ out[6] = x67;
+ out[7] = x79;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e416m2e208m1/fesquare.c b/src/Specific/solinas64_2e416m2e208m1/fesquare.c
index 2cca42bdd..16272c033 100644
--- a/src/Specific/solinas64_2e416m2e208m1/fesquare.c
+++ b/src/Specific/solinas64_2e416m2e208m1/fesquare.c
@@ -1,83 +1,73 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x15 = (((uint128_t)(x8 + x13) * (x8 + x13)) - ((uint128_t)x8 * x8));
-{ uint128_t x16 = ((((uint128_t)(x6 + x14) * (x8 + x13)) + ((uint128_t)(x8 + x13) * (x6 + x14))) - (((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)));
-{ uint128_t x17 = ((((uint128_t)(x4 + x12) * (x8 + x13)) + (((uint128_t)(x6 + x14) * (x6 + x14)) + ((uint128_t)(x8 + x13) * (x4 + x12)))) - (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + ((uint128_t)x8 * x4))));
-{ uint128_t x18 = ((((uint128_t)(x2 + x10) * (x8 + x13)) + (((uint128_t)(x4 + x12) * (x6 + x14)) + (((uint128_t)(x6 + x14) * (x4 + x12)) + ((uint128_t)(x8 + x13) * (x2 + x10))))) - (((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))));
-{ uint128_t x19 = ((((uint128_t)(x2 + x10) * (x6 + x14)) + (((uint128_t)(x4 + x12) * (x4 + x12)) + ((uint128_t)(x6 + x14) * (x2 + x10)))) - (((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))));
-{ uint128_t x20 = ((((uint128_t)(x2 + x10) * (x4 + x12)) + ((uint128_t)(x4 + x12) * (x2 + x10))) - (((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)));
-{ uint128_t x21 = (((uint128_t)(x2 + x10) * (x2 + x10)) - ((uint128_t)x2 * x2));
-{ uint128_t x22 = (((((uint128_t)x8 * x8) + ((uint128_t)x13 * x13)) + x19) + x15);
-{ uint128_t x23 = ((((((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)) + (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))) + x20) + x16);
-{ uint128_t x24 = ((((((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + ((uint128_t)x8 * x4))) + (((uint128_t)x12 * x13) + (((uint128_t)x14 * x14) + ((uint128_t)x13 * x12)))) + x21) + x17);
-{ uint128_t x25 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10)))));
-{ uint128_t x26 = (((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + ((uint128_t)x14 * x10)))) + x15);
-{ uint128_t x27 = (((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (((uint128_t)x10 * x12) + ((uint128_t)x12 * x10))) + x16);
-{ uint128_t x28 = ((((uint128_t)x2 * x2) + ((uint128_t)x10 * x10)) + x17);
-{ uint64_t x29 = (uint64_t) (x25 >> 0x34);
-{ uint64_t x30 = ((uint64_t)x25 & 0xfffffffffffff);
-{ uint64_t x31 = (uint64_t) (x18 >> 0x34);
-{ uint64_t x32 = ((uint64_t)x18 & 0xfffffffffffff);
-{ uint128_t x33 = (((uint128_t)0x10000000000000 * x31) + x32);
-{ uint64_t x34 = (uint64_t) (x33 >> 0x34);
-{ uint64_t x35 = ((uint64_t)x33 & 0xfffffffffffff);
-{ uint128_t x36 = ((x29 + x24) + x34);
-{ uint64_t x37 = (uint64_t) (x36 >> 0x34);
-{ uint64_t x38 = ((uint64_t)x36 & 0xfffffffffffff);
-{ uint128_t x39 = (x28 + x34);
-{ uint64_t x40 = (uint64_t) (x39 >> 0x34);
-{ uint64_t x41 = ((uint64_t)x39 & 0xfffffffffffff);
-{ uint128_t x42 = (x37 + x23);
-{ uint64_t x43 = (uint64_t) (x42 >> 0x34);
-{ uint64_t x44 = ((uint64_t)x42 & 0xfffffffffffff);
-{ uint128_t x45 = (x40 + x27);
-{ uint64_t x46 = (uint64_t) (x45 >> 0x34);
-{ uint64_t x47 = ((uint64_t)x45 & 0xfffffffffffff);
-{ uint128_t x48 = (x43 + x22);
-{ uint64_t x49 = (uint64_t) (x48 >> 0x34);
-{ uint64_t x50 = ((uint64_t)x48 & 0xfffffffffffff);
-{ uint128_t x51 = (x46 + x26);
-{ uint64_t x52 = (uint64_t) (x51 >> 0x34);
-{ uint64_t x53 = ((uint64_t)x51 & 0xfffffffffffff);
-{ uint64_t x54 = (x49 + x35);
-{ uint64_t x55 = (x54 >> 0x34);
-{ uint64_t x56 = (x54 & 0xfffffffffffff);
-{ uint64_t x57 = (x52 + x30);
-{ uint64_t x58 = (x57 >> 0x34);
-{ uint64_t x59 = (x57 & 0xfffffffffffff);
-{ uint64_t x60 = ((0x10000000000000 * x55) + x56);
-{ uint64_t x61 = (x60 >> 0x34);
-{ uint64_t x62 = (x60 & 0xfffffffffffff);
-{ uint64_t x63 = ((x58 + x38) + x61);
-{ uint64_t x64 = (x63 >> 0x34);
-{ uint64_t x65 = (x63 & 0xfffffffffffff);
-{ uint64_t x66 = (x41 + x61);
-{ uint64_t x67 = (x66 >> 0x34);
-{ uint64_t x68 = (x66 & 0xfffffffffffff);
-out[0] = x62;
-out[1] = x50;
-out[2] = x64 + x44;
-out[3] = x65;
-out[4] = x59;
-out[5] = x53;
-out[6] = x67 + x47;
-out[7] = x68;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void fesquare(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x15 = (((uint128_t)(x8 + x13) * (x8 + x13)) - ((uint128_t)x8 * x8));
+ { uint128_t x16 = ((((uint128_t)(x6 + x14) * (x8 + x13)) + ((uint128_t)(x8 + x13) * (x6 + x14))) - (((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)));
+ { uint128_t x17 = ((((uint128_t)(x4 + x12) * (x8 + x13)) + (((uint128_t)(x6 + x14) * (x6 + x14)) + ((uint128_t)(x8 + x13) * (x4 + x12)))) - (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + ((uint128_t)x8 * x4))));
+ { uint128_t x18 = ((((uint128_t)(x2 + x10) * (x8 + x13)) + (((uint128_t)(x4 + x12) * (x6 + x14)) + (((uint128_t)(x6 + x14) * (x4 + x12)) + ((uint128_t)(x8 + x13) * (x2 + x10))))) - (((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))));
+ { uint128_t x19 = ((((uint128_t)(x2 + x10) * (x6 + x14)) + (((uint128_t)(x4 + x12) * (x4 + x12)) + ((uint128_t)(x6 + x14) * (x2 + x10)))) - (((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))));
+ { uint128_t x20 = ((((uint128_t)(x2 + x10) * (x4 + x12)) + ((uint128_t)(x4 + x12) * (x2 + x10))) - (((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)));
+ { uint128_t x21 = (((uint128_t)(x2 + x10) * (x2 + x10)) - ((uint128_t)x2 * x2));
+ { uint128_t x22 = (((((uint128_t)x8 * x8) + ((uint128_t)x13 * x13)) + x19) + x15);
+ { uint128_t x23 = ((((((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)) + (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))) + x20) + x16);
+ { uint128_t x24 = ((((((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + ((uint128_t)x8 * x4))) + (((uint128_t)x12 * x13) + (((uint128_t)x14 * x14) + ((uint128_t)x13 * x12)))) + x21) + x17);
+ { uint128_t x25 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10)))));
+ { uint128_t x26 = (((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + ((uint128_t)x14 * x10)))) + x15);
+ { uint128_t x27 = (((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (((uint128_t)x10 * x12) + ((uint128_t)x12 * x10))) + x16);
+ { uint128_t x28 = ((((uint128_t)x2 * x2) + ((uint128_t)x10 * x10)) + x17);
+ { uint64_t x29 = (uint64_t) (x25 >> 0x34);
+ { uint64_t x30 = ((uint64_t)x25 & 0xfffffffffffff);
+ { uint64_t x31 = (uint64_t) (x18 >> 0x34);
+ { uint64_t x32 = ((uint64_t)x18 & 0xfffffffffffff);
+ { uint128_t x33 = (((uint128_t)0x10000000000000 * x31) + x32);
+ { uint64_t x34 = (uint64_t) (x33 >> 0x34);
+ { uint64_t x35 = ((uint64_t)x33 & 0xfffffffffffff);
+ { uint128_t x36 = ((x29 + x24) + x34);
+ { uint64_t x37 = (uint64_t) (x36 >> 0x34);
+ { uint64_t x38 = ((uint64_t)x36 & 0xfffffffffffff);
+ { uint128_t x39 = (x28 + x34);
+ { uint64_t x40 = (uint64_t) (x39 >> 0x34);
+ { uint64_t x41 = ((uint64_t)x39 & 0xfffffffffffff);
+ { uint128_t x42 = (x37 + x23);
+ { uint64_t x43 = (uint64_t) (x42 >> 0x34);
+ { uint64_t x44 = ((uint64_t)x42 & 0xfffffffffffff);
+ { uint128_t x45 = (x40 + x27);
+ { uint64_t x46 = (uint64_t) (x45 >> 0x34);
+ { uint64_t x47 = ((uint64_t)x45 & 0xfffffffffffff);
+ { uint128_t x48 = (x43 + x22);
+ { uint64_t x49 = (uint64_t) (x48 >> 0x34);
+ { uint64_t x50 = ((uint64_t)x48 & 0xfffffffffffff);
+ { uint128_t x51 = (x46 + x26);
+ { uint64_t x52 = (uint64_t) (x51 >> 0x34);
+ { uint64_t x53 = ((uint64_t)x51 & 0xfffffffffffff);
+ { uint64_t x54 = (x49 + x35);
+ { uint64_t x55 = (x54 >> 0x34);
+ { uint64_t x56 = (x54 & 0xfffffffffffff);
+ { uint64_t x57 = (x52 + x30);
+ { uint64_t x58 = (x57 >> 0x34);
+ { uint64_t x59 = (x57 & 0xfffffffffffff);
+ { uint64_t x60 = ((0x10000000000000 * x55) + x56);
+ { uint64_t x61 = (x60 >> 0x34);
+ { uint64_t x62 = (x60 & 0xfffffffffffff);
+ { uint64_t x63 = ((x58 + x38) + x61);
+ { uint64_t x64 = (x63 >> 0x34);
+ { uint64_t x65 = (x63 & 0xfffffffffffff);
+ { uint64_t x66 = (x41 + x61);
+ { uint64_t x67 = (x66 >> 0x34);
+ { uint64_t x68 = (x66 & 0xfffffffffffff);
+ out[0] = x68;
+ out[1] = (x67 + x47);
+ out[2] = x53;
+ out[3] = x59;
+ out[4] = x65;
+ out[5] = (x64 + x44);
+ out[6] = x50;
+ out[7] = x62;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e416m2e208m1/freeze.c b/src/Specific/solinas64_2e416m2e208m1/freeze.c
index 14a01d292..f704b6f38 100644
--- a/src/Specific/solinas64_2e416m2e208m1/freeze.c
+++ b/src/Specific/solinas64_2e416m2e208m1/freeze.c
@@ -1,25 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x16;
-out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 52 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xfffffffffffff;;
+static void freeze(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xfffffffffffff);
+ { uint64_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x17, Return x4, 0xfffffffffffff);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x20, Return x6, 0xfffffffffffff);
+ { uint64_t x25, uint8_t x26 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x8, 0xfffffffffffff);
+ { uint64_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x26, Return x10, 0xffffffffffffe);
+ { uint64_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x29, Return x12, 0xfffffffffffff);
+ { uint64_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x32, Return x14, 0xfffffffffffff);
+ { uint64_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x35, Return x13, 0xfffffffffffff);
+ { uint64_t x39 = (uint64_t)cmovznz(x38, 0x0, 0xffffffffffffffffL);
+ { uint64_t x40 = (x39 & 0xfffffffffffff);
+ { uint64_t x42, uint8_t x43 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x16, Return x40);
+ { uint64_t x44 = (x39 & 0xfffffffffffff);
+ { uint64_t x46, uint8_t x47 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x43, Return x19, Return x44);
+ { uint64_t x48 = (x39 & 0xfffffffffffff);
+ { uint64_t x50, uint8_t x51 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x47, Return x22, Return x48);
+ { uint64_t x52 = (x39 & 0xfffffffffffff);
+ { uint64_t x54, uint8_t x55 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x51, Return x25, Return x52);
+ { uint64_t x56 = (x39 & 0xffffffffffffe);
+ { uint64_t x58, uint8_t x59 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x55, Return x28, Return x56);
+ { uint64_t x60 = (x39 & 0xfffffffffffff);
+ { uint64_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x59, Return x31, Return x60);
+ { uint64_t x64 = (x39 & 0xfffffffffffff);
+ { uint64_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x63, Return x34, Return x64);
+ { uint64_t x68 = (x39 & 0xfffffffffffff);
+ { uint64_t x70, uint8_t _ = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x67, Return x37, Return x68);
+ out[0] = x42;
+ out[1] = x46;
+ out[2] = x50;
+ out[3] = x54;
+ out[4] = x58;
+ out[5] = x62;
+ out[6] = x66;
+ out[7] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e444m17/femul.c b/src/Specific/solinas64_2e444m17/femul.c
index 729012826..5c0dca159 100644
--- a/src/Specific/solinas64_2e444m17/femul.c
+++ b/src/Specific/solinas64_2e444m17/femul.c
@@ -1,66 +1,64 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint128_t x32 = (((uint128_t)x5 * x30) + (((uint128_t)x7 * x31) + (((uint128_t)x9 * x29) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + (((uint128_t)x17 * x21) + ((uint128_t)x16 * x19))))))));
-{ uint128_t x33 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + (((uint128_t)x9 * x27) + ((0x2 * ((uint128_t)x11 * x25)) + (((uint128_t)x13 * x23) + ((0x2 * ((uint128_t)x15 * x21)) + ((uint128_t)x17 * x19))))))) + (0x11 * (0x2 * ((uint128_t)x16 * x30))));
-{ uint128_t x34 = ((((uint128_t)x5 * x29) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x15 * x19)))))) + (0x11 * (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))));
-{ uint128_t x35 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + (((uint128_t)x9 * x23) + ((0x2 * ((uint128_t)x11 * x21)) + ((uint128_t)x13 * x19))))) + (0x11 * ((0x2 * ((uint128_t)x15 * x30)) + (((uint128_t)x17 * x31) + (0x2 * ((uint128_t)x16 * x29))))));
-{ uint128_t x36 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))) + (0x11 * (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27))))));
-{ uint128_t x37 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((uint128_t)x9 * x19))) + (0x11 * ((0x2 * ((uint128_t)x11 * x30)) + (((uint128_t)x13 * x31) + ((0x2 * ((uint128_t)x15 * x29)) + (((uint128_t)x17 * x27) + (0x2 * ((uint128_t)x16 * x25))))))));
-{ uint128_t x38 = ((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (0x11 * (((uint128_t)x9 * x30) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + ((uint128_t)x16 * x23))))))));
-{ uint128_t x39 = (((uint128_t)x5 * x19) + (0x11 * ((0x2 * ((uint128_t)x7 * x30)) + (((uint128_t)x9 * x31) + ((0x2 * ((uint128_t)x11 * x29)) + (((uint128_t)x13 * x27) + ((0x2 * ((uint128_t)x15 * x25)) + (((uint128_t)x17 * x23) + (0x2 * ((uint128_t)x16 * x21))))))))));
-{ uint128_t x40 = (x39 >> 0x38);
-{ uint64_t x41 = ((uint64_t)x39 & 0xffffffffffffff);
-{ uint128_t x42 = (x40 + x38);
-{ uint128_t x43 = (x42 >> 0x37);
-{ uint64_t x44 = ((uint64_t)x42 & 0x7fffffffffffff);
-{ uint128_t x45 = (x43 + x37);
-{ uint128_t x46 = (x45 >> 0x38);
-{ uint64_t x47 = ((uint64_t)x45 & 0xffffffffffffff);
-{ uint128_t x48 = (x46 + x36);
-{ uint128_t x49 = (x48 >> 0x37);
-{ uint64_t x50 = ((uint64_t)x48 & 0x7fffffffffffff);
-{ uint128_t x51 = (x49 + x35);
-{ uint128_t x52 = (x51 >> 0x38);
-{ uint64_t x53 = ((uint64_t)x51 & 0xffffffffffffff);
-{ uint128_t x54 = (x52 + x34);
-{ uint128_t x55 = (x54 >> 0x37);
-{ uint64_t x56 = ((uint64_t)x54 & 0x7fffffffffffff);
-{ uint128_t x57 = (x55 + x33);
-{ uint64_t x58 = (uint64_t) (x57 >> 0x38);
-{ uint64_t x59 = ((uint64_t)x57 & 0xffffffffffffff);
-{ uint128_t x60 = (x58 + x32);
-{ uint64_t x61 = (uint64_t) (x60 >> 0x37);
-{ uint64_t x62 = ((uint64_t)x60 & 0x7fffffffffffff);
-{ uint128_t x63 = (x41 + ((uint128_t)0x11 * x61));
-{ uint64_t x64 = (uint64_t) (x63 >> 0x38);
-{ uint64_t x65 = ((uint64_t)x63 & 0xffffffffffffff);
-{ uint64_t x66 = (x64 + x44);
-{ uint64_t x67 = (x66 >> 0x37);
-{ uint64_t x68 = (x66 & 0x7fffffffffffff);
-out[0] = x62;
-out[1] = x59;
-out[2] = x56;
-out[3] = x53;
-out[4] = x50;
-out[5] = x67 + x47;
-out[6] = x68;
-out[7] = x65;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void femul(uint64_t out[8], const uint64_t in1[8], const uint64_t in2[8]) {
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x30 = in2[7];
+ { const uint64_t x31 = in2[6];
+ { const uint64_t x29 = in2[5];
+ { const uint64_t x27 = in2[4];
+ { const uint64_t x25 = in2[3];
+ { const uint64_t x23 = in2[2];
+ { const uint64_t x21 = in2[1];
+ { const uint64_t x19 = in2[0];
+ { uint128_t x32 = (((uint128_t)x5 * x30) + (((uint128_t)x7 * x31) + (((uint128_t)x9 * x29) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + (((uint128_t)x17 * x21) + ((uint128_t)x16 * x19))))))));
+ { uint128_t x33 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + (((uint128_t)x9 * x27) + ((0x2 * ((uint128_t)x11 * x25)) + (((uint128_t)x13 * x23) + ((0x2 * ((uint128_t)x15 * x21)) + ((uint128_t)x17 * x19))))))) + (0x11 * (0x2 * ((uint128_t)x16 * x30))));
+ { uint128_t x34 = ((((uint128_t)x5 * x29) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x15 * x19)))))) + (0x11 * (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))));
+ { uint128_t x35 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + (((uint128_t)x9 * x23) + ((0x2 * ((uint128_t)x11 * x21)) + ((uint128_t)x13 * x19))))) + (0x11 * ((0x2 * ((uint128_t)x15 * x30)) + (((uint128_t)x17 * x31) + (0x2 * ((uint128_t)x16 * x29))))));
+ { uint128_t x36 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))) + (0x11 * (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27))))));
+ { uint128_t x37 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((uint128_t)x9 * x19))) + (0x11 * ((0x2 * ((uint128_t)x11 * x30)) + (((uint128_t)x13 * x31) + ((0x2 * ((uint128_t)x15 * x29)) + (((uint128_t)x17 * x27) + (0x2 * ((uint128_t)x16 * x25))))))));
+ { uint128_t x38 = ((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (0x11 * (((uint128_t)x9 * x30) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + ((uint128_t)x16 * x23))))))));
+ { uint128_t x39 = (((uint128_t)x5 * x19) + (0x11 * ((0x2 * ((uint128_t)x7 * x30)) + (((uint128_t)x9 * x31) + ((0x2 * ((uint128_t)x11 * x29)) + (((uint128_t)x13 * x27) + ((0x2 * ((uint128_t)x15 * x25)) + (((uint128_t)x17 * x23) + (0x2 * ((uint128_t)x16 * x21))))))))));
+ { uint128_t x40 = (x39 >> 0x38);
+ { uint64_t x41 = ((uint64_t)x39 & 0xffffffffffffff);
+ { uint128_t x42 = (x40 + x38);
+ { uint128_t x43 = (x42 >> 0x37);
+ { uint64_t x44 = ((uint64_t)x42 & 0x7fffffffffffff);
+ { uint128_t x45 = (x43 + x37);
+ { uint128_t x46 = (x45 >> 0x38);
+ { uint64_t x47 = ((uint64_t)x45 & 0xffffffffffffff);
+ { uint128_t x48 = (x46 + x36);
+ { uint128_t x49 = (x48 >> 0x37);
+ { uint64_t x50 = ((uint64_t)x48 & 0x7fffffffffffff);
+ { uint128_t x51 = (x49 + x35);
+ { uint128_t x52 = (x51 >> 0x38);
+ { uint64_t x53 = ((uint64_t)x51 & 0xffffffffffffff);
+ { uint128_t x54 = (x52 + x34);
+ { uint128_t x55 = (x54 >> 0x37);
+ { uint64_t x56 = ((uint64_t)x54 & 0x7fffffffffffff);
+ { uint128_t x57 = (x55 + x33);
+ { uint64_t x58 = (uint64_t) (x57 >> 0x38);
+ { uint64_t x59 = ((uint64_t)x57 & 0xffffffffffffff);
+ { uint128_t x60 = (x58 + x32);
+ { uint64_t x61 = (uint64_t) (x60 >> 0x37);
+ { uint64_t x62 = ((uint64_t)x60 & 0x7fffffffffffff);
+ { uint128_t x63 = (x41 + ((uint128_t)0x11 * x61));
+ { uint64_t x64 = (uint64_t) (x63 >> 0x38);
+ { uint64_t x65 = ((uint64_t)x63 & 0xffffffffffffff);
+ { uint64_t x66 = (x64 + x44);
+ { uint64_t x67 = (x66 >> 0x37);
+ { uint64_t x68 = (x66 & 0x7fffffffffffff);
+ out[0] = x65;
+ out[1] = x68;
+ out[2] = (x67 + x47);
+ out[3] = x50;
+ out[4] = x53;
+ out[5] = x56;
+ out[6] = x59;
+ out[7] = x62;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e444m17/fesquare.c b/src/Specific/solinas64_2e444m17/fesquare.c
index 0633a574b..a07f1f07a 100644
--- a/src/Specific/solinas64_2e444m17/fesquare.c
+++ b/src/Specific/solinas64_2e444m17/fesquare.c
@@ -1,66 +1,56 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x15 = (((uint128_t)x2 * x13) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x13 * x2))))))));
-{ uint128_t x16 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + (((uint128_t)x6 * x10) + ((0x2 * ((uint128_t)x8 * x8)) + (((uint128_t)x10 * x6) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x11 * (0x2 * ((uint128_t)x13 * x13))));
-{ uint128_t x17 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x11 * (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))));
-{ uint128_t x18 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + (((uint128_t)x6 * x6) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x11 * ((0x2 * ((uint128_t)x12 * x13)) + (((uint128_t)x14 * x14) + (0x2 * ((uint128_t)x13 * x12))))));
-{ uint128_t x19 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x11 * (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10))))));
-{ uint128_t x20 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x11 * ((0x2 * ((uint128_t)x8 * x13)) + (((uint128_t)x10 * x14) + ((0x2 * ((uint128_t)x12 * x12)) + (((uint128_t)x14 * x10) + (0x2 * ((uint128_t)x13 * x8))))))));
-{ uint128_t x21 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * (((uint128_t)x6 * x13) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + ((uint128_t)x13 * x6))))))));
-{ uint128_t x22 = (((uint128_t)x2 * x2) + (0x11 * ((0x2 * ((uint128_t)x4 * x13)) + (((uint128_t)x6 * x14) + ((0x2 * ((uint128_t)x8 * x12)) + (((uint128_t)x10 * x10) + ((0x2 * ((uint128_t)x12 * x8)) + (((uint128_t)x14 * x6) + (0x2 * ((uint128_t)x13 * x4))))))))));
-{ uint128_t x23 = (x22 >> 0x38);
-{ uint64_t x24 = ((uint64_t)x22 & 0xffffffffffffff);
-{ uint128_t x25 = (x23 + x21);
-{ uint128_t x26 = (x25 >> 0x37);
-{ uint64_t x27 = ((uint64_t)x25 & 0x7fffffffffffff);
-{ uint128_t x28 = (x26 + x20);
-{ uint128_t x29 = (x28 >> 0x38);
-{ uint64_t x30 = ((uint64_t)x28 & 0xffffffffffffff);
-{ uint128_t x31 = (x29 + x19);
-{ uint128_t x32 = (x31 >> 0x37);
-{ uint64_t x33 = ((uint64_t)x31 & 0x7fffffffffffff);
-{ uint128_t x34 = (x32 + x18);
-{ uint128_t x35 = (x34 >> 0x38);
-{ uint64_t x36 = ((uint64_t)x34 & 0xffffffffffffff);
-{ uint128_t x37 = (x35 + x17);
-{ uint128_t x38 = (x37 >> 0x37);
-{ uint64_t x39 = ((uint64_t)x37 & 0x7fffffffffffff);
-{ uint128_t x40 = (x38 + x16);
-{ uint64_t x41 = (uint64_t) (x40 >> 0x38);
-{ uint64_t x42 = ((uint64_t)x40 & 0xffffffffffffff);
-{ uint128_t x43 = (x41 + x15);
-{ uint64_t x44 = (uint64_t) (x43 >> 0x37);
-{ uint64_t x45 = ((uint64_t)x43 & 0x7fffffffffffff);
-{ uint128_t x46 = (x24 + ((uint128_t)0x11 * x44));
-{ uint64_t x47 = (uint64_t) (x46 >> 0x38);
-{ uint64_t x48 = ((uint64_t)x46 & 0xffffffffffffff);
-{ uint64_t x49 = (x47 + x27);
-{ uint64_t x50 = (x49 >> 0x37);
-{ uint64_t x51 = (x49 & 0x7fffffffffffff);
-out[0] = x45;
-out[1] = x42;
-out[2] = x39;
-out[3] = x36;
-out[4] = x33;
-out[5] = x50 + x30;
-out[6] = x51;
-out[7] = x48;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void fesquare(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x15 = (((uint128_t)x2 * x13) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x13 * x2))))))));
+ { uint128_t x16 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + (((uint128_t)x6 * x10) + ((0x2 * ((uint128_t)x8 * x8)) + (((uint128_t)x10 * x6) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x11 * (0x2 * ((uint128_t)x13 * x13))));
+ { uint128_t x17 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x11 * (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))));
+ { uint128_t x18 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + (((uint128_t)x6 * x6) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x11 * ((0x2 * ((uint128_t)x12 * x13)) + (((uint128_t)x14 * x14) + (0x2 * ((uint128_t)x13 * x12))))));
+ { uint128_t x19 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x11 * (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10))))));
+ { uint128_t x20 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x11 * ((0x2 * ((uint128_t)x8 * x13)) + (((uint128_t)x10 * x14) + ((0x2 * ((uint128_t)x12 * x12)) + (((uint128_t)x14 * x10) + (0x2 * ((uint128_t)x13 * x8))))))));
+ { uint128_t x21 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * (((uint128_t)x6 * x13) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + ((uint128_t)x13 * x6))))))));
+ { uint128_t x22 = (((uint128_t)x2 * x2) + (0x11 * ((0x2 * ((uint128_t)x4 * x13)) + (((uint128_t)x6 * x14) + ((0x2 * ((uint128_t)x8 * x12)) + (((uint128_t)x10 * x10) + ((0x2 * ((uint128_t)x12 * x8)) + (((uint128_t)x14 * x6) + (0x2 * ((uint128_t)x13 * x4))))))))));
+ { uint128_t x23 = (x22 >> 0x38);
+ { uint64_t x24 = ((uint64_t)x22 & 0xffffffffffffff);
+ { uint128_t x25 = (x23 + x21);
+ { uint128_t x26 = (x25 >> 0x37);
+ { uint64_t x27 = ((uint64_t)x25 & 0x7fffffffffffff);
+ { uint128_t x28 = (x26 + x20);
+ { uint128_t x29 = (x28 >> 0x38);
+ { uint64_t x30 = ((uint64_t)x28 & 0xffffffffffffff);
+ { uint128_t x31 = (x29 + x19);
+ { uint128_t x32 = (x31 >> 0x37);
+ { uint64_t x33 = ((uint64_t)x31 & 0x7fffffffffffff);
+ { uint128_t x34 = (x32 + x18);
+ { uint128_t x35 = (x34 >> 0x38);
+ { uint64_t x36 = ((uint64_t)x34 & 0xffffffffffffff);
+ { uint128_t x37 = (x35 + x17);
+ { uint128_t x38 = (x37 >> 0x37);
+ { uint64_t x39 = ((uint64_t)x37 & 0x7fffffffffffff);
+ { uint128_t x40 = (x38 + x16);
+ { uint64_t x41 = (uint64_t) (x40 >> 0x38);
+ { uint64_t x42 = ((uint64_t)x40 & 0xffffffffffffff);
+ { uint128_t x43 = (x41 + x15);
+ { uint64_t x44 = (uint64_t) (x43 >> 0x37);
+ { uint64_t x45 = ((uint64_t)x43 & 0x7fffffffffffff);
+ { uint128_t x46 = (x24 + ((uint128_t)0x11 * x44));
+ { uint64_t x47 = (uint64_t) (x46 >> 0x38);
+ { uint64_t x48 = ((uint64_t)x46 & 0xffffffffffffff);
+ { uint64_t x49 = (x47 + x27);
+ { uint64_t x50 = (x49 >> 0x37);
+ { uint64_t x51 = (x49 & 0x7fffffffffffff);
+ out[0] = x48;
+ out[1] = x51;
+ out[2] = (x50 + x30);
+ out[3] = x33;
+ out[4] = x36;
+ out[5] = x39;
+ out[6] = x42;
+ out[7] = x45;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e444m17/freeze.c b/src/Specific/solinas64_2e444m17/freeze.c
index 879618713..cbf2315ef 100644
--- a/src/Specific/solinas64_2e444m17/freeze.c
+++ b/src/Specific/solinas64_2e444m17/freeze.c
@@ -1,25 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x16;
-out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 56 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffffffffffef;;
+static void freeze(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xffffffffffffef);
+ { uint64_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x17, Return x4, 0x7fffffffffffff);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x20, Return x6, 0xffffffffffffff);
+ { uint64_t x25, uint8_t x26 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x8, 0x7fffffffffffff);
+ { uint64_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x26, Return x10, 0xffffffffffffff);
+ { uint64_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x29, Return x12, 0x7fffffffffffff);
+ { uint64_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x32, Return x14, 0xffffffffffffff);
+ { uint64_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x35, Return x13, 0x7fffffffffffff);
+ { uint64_t x39 = (uint64_t)cmovznz(x38, 0x0, 0xffffffffffffffffL);
+ { uint64_t x40 = (x39 & 0xffffffffffffef);
+ { uint64_t x42, uint8_t x43 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x16, Return x40);
+ { uint64_t x44 = (x39 & 0x7fffffffffffff);
+ { uint64_t x46, uint8_t x47 = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x43, Return x19, Return x44);
+ { uint64_t x48 = (x39 & 0xffffffffffffff);
+ { uint64_t x50, uint8_t x51 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x47, Return x22, Return x48);
+ { uint64_t x52 = (x39 & 0x7fffffffffffff);
+ { uint64_t x54, uint8_t x55 = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x51, Return x25, Return x52);
+ { uint64_t x56 = (x39 & 0xffffffffffffff);
+ { uint64_t x58, uint8_t x59 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x55, Return x28, Return x56);
+ { uint64_t x60 = (x39 & 0x7fffffffffffff);
+ { uint64_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x59, Return x31, Return x60);
+ { uint64_t x64 = (x39 & 0xffffffffffffff);
+ { uint64_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x63, Return x34, Return x64);
+ { uint64_t x68 = (x39 & 0x7fffffffffffff);
+ { uint64_t x70, uint8_t _ = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x67, Return x37, Return x68);
+ out[0] = x42;
+ out[1] = x46;
+ out[2] = x50;
+ out[3] = x54;
+ out[4] = x58;
+ out[5] = x62;
+ out[6] = x66;
+ out[7] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e448m2e224m1/femul.c b/src/Specific/solinas64_2e448m2e224m1/femul.c
index 698c9e8ff..9dc18d44a 100644
--- a/src/Specific/solinas64_2e448m2e224m1/femul.c
+++ b/src/Specific/solinas64_2e448m2e224m1/femul.c
@@ -1,83 +1,81 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint128_t x32 = (((uint128_t)(x11 + x16) * (x25 + x30)) - ((uint128_t)x11 * x25));
-{ uint128_t x33 = ((((uint128_t)(x9 + x17) * (x25 + x30)) + ((uint128_t)(x11 + x16) * (x23 + x31))) - (((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)));
-{ uint128_t x34 = ((((uint128_t)(x7 + x15) * (x25 + x30)) + (((uint128_t)(x9 + x17) * (x23 + x31)) + ((uint128_t)(x11 + x16) * (x21 + x29)))) - (((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21))));
-{ uint128_t x35 = ((((uint128_t)(x5 + x13) * (x25 + x30)) + (((uint128_t)(x7 + x15) * (x23 + x31)) + (((uint128_t)(x9 + x17) * (x21 + x29)) + ((uint128_t)(x11 + x16) * (x19 + x27))))) - (((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))));
-{ uint128_t x36 = ((((uint128_t)(x5 + x13) * (x23 + x31)) + (((uint128_t)(x7 + x15) * (x21 + x29)) + ((uint128_t)(x9 + x17) * (x19 + x27)))) - (((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + ((uint128_t)x9 * x19))));
-{ uint128_t x37 = ((((uint128_t)(x5 + x13) * (x21 + x29)) + ((uint128_t)(x7 + x15) * (x19 + x27))) - (((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)));
-{ uint128_t x38 = (((uint128_t)(x5 + x13) * (x19 + x27)) - ((uint128_t)x5 * x19));
-{ uint128_t x39 = (((((uint128_t)x11 * x25) + ((uint128_t)x16 * x30)) + x36) + x32);
-{ uint128_t x40 = ((((((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)) + (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))) + x37) + x33);
-{ uint128_t x41 = ((((((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21))) + (((uint128_t)x15 * x30) + (((uint128_t)x17 * x31) + ((uint128_t)x16 * x29)))) + x38) + x34);
-{ uint128_t x42 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))) + (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27)))));
-{ uint128_t x43 = (((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + ((uint128_t)x9 * x19))) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + ((uint128_t)x17 * x27)))) + x32);
-{ uint128_t x44 = (((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (((uint128_t)x13 * x29) + ((uint128_t)x15 * x27))) + x33);
-{ uint128_t x45 = ((((uint128_t)x5 * x19) + ((uint128_t)x13 * x27)) + x34);
-{ uint64_t x46 = (uint64_t) (x42 >> 0x38);
-{ uint64_t x47 = ((uint64_t)x42 & 0xffffffffffffff);
-{ uint64_t x48 = (uint64_t) (x35 >> 0x38);
-{ uint64_t x49 = ((uint64_t)x35 & 0xffffffffffffff);
-{ uint128_t x50 = (((uint128_t)0x100000000000000 * x48) + x49);
-{ uint64_t x51 = (uint64_t) (x50 >> 0x38);
-{ uint64_t x52 = ((uint64_t)x50 & 0xffffffffffffff);
-{ uint128_t x53 = ((x46 + x41) + x51);
-{ uint64_t x54 = (uint64_t) (x53 >> 0x38);
-{ uint64_t x55 = ((uint64_t)x53 & 0xffffffffffffff);
-{ uint128_t x56 = (x45 + x51);
-{ uint64_t x57 = (uint64_t) (x56 >> 0x38);
-{ uint64_t x58 = ((uint64_t)x56 & 0xffffffffffffff);
-{ uint128_t x59 = (x54 + x40);
-{ uint64_t x60 = (uint64_t) (x59 >> 0x38);
-{ uint64_t x61 = ((uint64_t)x59 & 0xffffffffffffff);
-{ uint128_t x62 = (x57 + x44);
-{ uint64_t x63 = (uint64_t) (x62 >> 0x38);
-{ uint64_t x64 = ((uint64_t)x62 & 0xffffffffffffff);
-{ uint128_t x65 = (x60 + x39);
-{ uint64_t x66 = (uint64_t) (x65 >> 0x38);
-{ uint64_t x67 = ((uint64_t)x65 & 0xffffffffffffff);
-{ uint128_t x68 = (x63 + x43);
-{ uint64_t x69 = (uint64_t) (x68 >> 0x38);
-{ uint64_t x70 = ((uint64_t)x68 & 0xffffffffffffff);
-{ uint64_t x71 = (x66 + x52);
-{ uint64_t x72 = (x71 >> 0x38);
-{ uint64_t x73 = (x71 & 0xffffffffffffff);
-{ uint64_t x74 = (x69 + x47);
-{ uint64_t x75 = (x74 >> 0x38);
-{ uint64_t x76 = (x74 & 0xffffffffffffff);
-{ uint64_t x77 = ((0x100000000000000 * x72) + x73);
-{ uint64_t x78 = (x77 >> 0x38);
-{ uint64_t x79 = (x77 & 0xffffffffffffff);
-{ uint64_t x80 = ((x75 + x55) + x78);
-{ uint64_t x81 = (x80 >> 0x38);
-{ uint64_t x82 = (x80 & 0xffffffffffffff);
-{ uint64_t x83 = (x58 + x78);
-{ uint64_t x84 = (x83 >> 0x38);
-{ uint64_t x85 = (x83 & 0xffffffffffffff);
-out[0] = x79;
-out[1] = x67;
-out[2] = x81 + x61;
-out[3] = x82;
-out[4] = x76;
-out[5] = x70;
-out[6] = x84 + x64;
-out[7] = x85;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void femul(uint64_t out[8], const uint64_t in1[8], const uint64_t in2[8]) {
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x30 = in2[7];
+ { const uint64_t x31 = in2[6];
+ { const uint64_t x29 = in2[5];
+ { const uint64_t x27 = in2[4];
+ { const uint64_t x25 = in2[3];
+ { const uint64_t x23 = in2[2];
+ { const uint64_t x21 = in2[1];
+ { const uint64_t x19 = in2[0];
+ { uint128_t x32 = (((uint128_t)(x11 + x16) * (x25 + x30)) - ((uint128_t)x11 * x25));
+ { uint128_t x33 = ((((uint128_t)(x9 + x17) * (x25 + x30)) + ((uint128_t)(x11 + x16) * (x23 + x31))) - (((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)));
+ { uint128_t x34 = ((((uint128_t)(x7 + x15) * (x25 + x30)) + (((uint128_t)(x9 + x17) * (x23 + x31)) + ((uint128_t)(x11 + x16) * (x21 + x29)))) - (((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21))));
+ { uint128_t x35 = ((((uint128_t)(x5 + x13) * (x25 + x30)) + (((uint128_t)(x7 + x15) * (x23 + x31)) + (((uint128_t)(x9 + x17) * (x21 + x29)) + ((uint128_t)(x11 + x16) * (x19 + x27))))) - (((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))));
+ { uint128_t x36 = ((((uint128_t)(x5 + x13) * (x23 + x31)) + (((uint128_t)(x7 + x15) * (x21 + x29)) + ((uint128_t)(x9 + x17) * (x19 + x27)))) - (((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + ((uint128_t)x9 * x19))));
+ { uint128_t x37 = ((((uint128_t)(x5 + x13) * (x21 + x29)) + ((uint128_t)(x7 + x15) * (x19 + x27))) - (((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)));
+ { uint128_t x38 = (((uint128_t)(x5 + x13) * (x19 + x27)) - ((uint128_t)x5 * x19));
+ { uint128_t x39 = (((((uint128_t)x11 * x25) + ((uint128_t)x16 * x30)) + x36) + x32);
+ { uint128_t x40 = ((((((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)) + (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))) + x37) + x33);
+ { uint128_t x41 = ((((((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21))) + (((uint128_t)x15 * x30) + (((uint128_t)x17 * x31) + ((uint128_t)x16 * x29)))) + x38) + x34);
+ { uint128_t x42 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))) + (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27)))));
+ { uint128_t x43 = (((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + ((uint128_t)x9 * x19))) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + ((uint128_t)x17 * x27)))) + x32);
+ { uint128_t x44 = (((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (((uint128_t)x13 * x29) + ((uint128_t)x15 * x27))) + x33);
+ { uint128_t x45 = ((((uint128_t)x5 * x19) + ((uint128_t)x13 * x27)) + x34);
+ { uint64_t x46 = (uint64_t) (x42 >> 0x38);
+ { uint64_t x47 = ((uint64_t)x42 & 0xffffffffffffff);
+ { uint64_t x48 = (uint64_t) (x35 >> 0x38);
+ { uint64_t x49 = ((uint64_t)x35 & 0xffffffffffffff);
+ { uint128_t x50 = (((uint128_t)0x100000000000000 * x48) + x49);
+ { uint64_t x51 = (uint64_t) (x50 >> 0x38);
+ { uint64_t x52 = ((uint64_t)x50 & 0xffffffffffffff);
+ { uint128_t x53 = ((x46 + x41) + x51);
+ { uint64_t x54 = (uint64_t) (x53 >> 0x38);
+ { uint64_t x55 = ((uint64_t)x53 & 0xffffffffffffff);
+ { uint128_t x56 = (x45 + x51);
+ { uint64_t x57 = (uint64_t) (x56 >> 0x38);
+ { uint64_t x58 = ((uint64_t)x56 & 0xffffffffffffff);
+ { uint128_t x59 = (x54 + x40);
+ { uint64_t x60 = (uint64_t) (x59 >> 0x38);
+ { uint64_t x61 = ((uint64_t)x59 & 0xffffffffffffff);
+ { uint128_t x62 = (x57 + x44);
+ { uint64_t x63 = (uint64_t) (x62 >> 0x38);
+ { uint64_t x64 = ((uint64_t)x62 & 0xffffffffffffff);
+ { uint128_t x65 = (x60 + x39);
+ { uint64_t x66 = (uint64_t) (x65 >> 0x38);
+ { uint64_t x67 = ((uint64_t)x65 & 0xffffffffffffff);
+ { uint128_t x68 = (x63 + x43);
+ { uint64_t x69 = (uint64_t) (x68 >> 0x38);
+ { uint64_t x70 = ((uint64_t)x68 & 0xffffffffffffff);
+ { uint64_t x71 = (x66 + x52);
+ { uint64_t x72 = (x71 >> 0x38);
+ { uint64_t x73 = (x71 & 0xffffffffffffff);
+ { uint64_t x74 = (x69 + x47);
+ { uint64_t x75 = (x74 >> 0x38);
+ { uint64_t x76 = (x74 & 0xffffffffffffff);
+ { uint64_t x77 = ((0x100000000000000 * x72) + x73);
+ { uint64_t x78 = (x77 >> 0x38);
+ { uint64_t x79 = (x77 & 0xffffffffffffff);
+ { uint64_t x80 = ((x75 + x55) + x78);
+ { uint64_t x81 = (x80 >> 0x38);
+ { uint64_t x82 = (x80 & 0xffffffffffffff);
+ { uint64_t x83 = (x58 + x78);
+ { uint64_t x84 = (x83 >> 0x38);
+ { uint64_t x85 = (x83 & 0xffffffffffffff);
+ out[0] = x85;
+ out[1] = (x84 + x64);
+ out[2] = x70;
+ out[3] = x76;
+ out[4] = x82;
+ out[5] = (x81 + x61);
+ out[6] = x67;
+ out[7] = x79;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e448m2e224m1/fesquare.c b/src/Specific/solinas64_2e448m2e224m1/fesquare.c
index 0dfda7f19..cfb66f0aa 100644
--- a/src/Specific/solinas64_2e448m2e224m1/fesquare.c
+++ b/src/Specific/solinas64_2e448m2e224m1/fesquare.c
@@ -1,83 +1,73 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x15 = (((uint128_t)(x8 + x13) * (x8 + x13)) - ((uint128_t)x8 * x8));
-{ uint128_t x16 = ((((uint128_t)(x6 + x14) * (x8 + x13)) + ((uint128_t)(x8 + x13) * (x6 + x14))) - (((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)));
-{ uint128_t x17 = ((((uint128_t)(x4 + x12) * (x8 + x13)) + (((uint128_t)(x6 + x14) * (x6 + x14)) + ((uint128_t)(x8 + x13) * (x4 + x12)))) - (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + ((uint128_t)x8 * x4))));
-{ uint128_t x18 = ((((uint128_t)(x2 + x10) * (x8 + x13)) + (((uint128_t)(x4 + x12) * (x6 + x14)) + (((uint128_t)(x6 + x14) * (x4 + x12)) + ((uint128_t)(x8 + x13) * (x2 + x10))))) - (((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))));
-{ uint128_t x19 = ((((uint128_t)(x2 + x10) * (x6 + x14)) + (((uint128_t)(x4 + x12) * (x4 + x12)) + ((uint128_t)(x6 + x14) * (x2 + x10)))) - (((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))));
-{ uint128_t x20 = ((((uint128_t)(x2 + x10) * (x4 + x12)) + ((uint128_t)(x4 + x12) * (x2 + x10))) - (((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)));
-{ uint128_t x21 = (((uint128_t)(x2 + x10) * (x2 + x10)) - ((uint128_t)x2 * x2));
-{ uint128_t x22 = (((((uint128_t)x8 * x8) + ((uint128_t)x13 * x13)) + x19) + x15);
-{ uint128_t x23 = ((((((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)) + (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))) + x20) + x16);
-{ uint128_t x24 = ((((((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + ((uint128_t)x8 * x4))) + (((uint128_t)x12 * x13) + (((uint128_t)x14 * x14) + ((uint128_t)x13 * x12)))) + x21) + x17);
-{ uint128_t x25 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10)))));
-{ uint128_t x26 = (((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + ((uint128_t)x14 * x10)))) + x15);
-{ uint128_t x27 = (((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (((uint128_t)x10 * x12) + ((uint128_t)x12 * x10))) + x16);
-{ uint128_t x28 = ((((uint128_t)x2 * x2) + ((uint128_t)x10 * x10)) + x17);
-{ uint64_t x29 = (uint64_t) (x25 >> 0x38);
-{ uint64_t x30 = ((uint64_t)x25 & 0xffffffffffffff);
-{ uint64_t x31 = (uint64_t) (x18 >> 0x38);
-{ uint64_t x32 = ((uint64_t)x18 & 0xffffffffffffff);
-{ uint128_t x33 = (((uint128_t)0x100000000000000 * x31) + x32);
-{ uint64_t x34 = (uint64_t) (x33 >> 0x38);
-{ uint64_t x35 = ((uint64_t)x33 & 0xffffffffffffff);
-{ uint128_t x36 = ((x29 + x24) + x34);
-{ uint64_t x37 = (uint64_t) (x36 >> 0x38);
-{ uint64_t x38 = ((uint64_t)x36 & 0xffffffffffffff);
-{ uint128_t x39 = (x28 + x34);
-{ uint64_t x40 = (uint64_t) (x39 >> 0x38);
-{ uint64_t x41 = ((uint64_t)x39 & 0xffffffffffffff);
-{ uint128_t x42 = (x37 + x23);
-{ uint64_t x43 = (uint64_t) (x42 >> 0x38);
-{ uint64_t x44 = ((uint64_t)x42 & 0xffffffffffffff);
-{ uint128_t x45 = (x40 + x27);
-{ uint64_t x46 = (uint64_t) (x45 >> 0x38);
-{ uint64_t x47 = ((uint64_t)x45 & 0xffffffffffffff);
-{ uint128_t x48 = (x43 + x22);
-{ uint64_t x49 = (uint64_t) (x48 >> 0x38);
-{ uint64_t x50 = ((uint64_t)x48 & 0xffffffffffffff);
-{ uint128_t x51 = (x46 + x26);
-{ uint64_t x52 = (uint64_t) (x51 >> 0x38);
-{ uint64_t x53 = ((uint64_t)x51 & 0xffffffffffffff);
-{ uint64_t x54 = (x49 + x35);
-{ uint64_t x55 = (x54 >> 0x38);
-{ uint64_t x56 = (x54 & 0xffffffffffffff);
-{ uint64_t x57 = (x52 + x30);
-{ uint64_t x58 = (x57 >> 0x38);
-{ uint64_t x59 = (x57 & 0xffffffffffffff);
-{ uint64_t x60 = ((0x100000000000000 * x55) + x56);
-{ uint64_t x61 = (x60 >> 0x38);
-{ uint64_t x62 = (x60 & 0xffffffffffffff);
-{ uint64_t x63 = ((x58 + x38) + x61);
-{ uint64_t x64 = (x63 >> 0x38);
-{ uint64_t x65 = (x63 & 0xffffffffffffff);
-{ uint64_t x66 = (x41 + x61);
-{ uint64_t x67 = (x66 >> 0x38);
-{ uint64_t x68 = (x66 & 0xffffffffffffff);
-out[0] = x62;
-out[1] = x50;
-out[2] = x64 + x44;
-out[3] = x65;
-out[4] = x59;
-out[5] = x53;
-out[6] = x67 + x47;
-out[7] = x68;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void fesquare(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x15 = (((uint128_t)(x8 + x13) * (x8 + x13)) - ((uint128_t)x8 * x8));
+ { uint128_t x16 = ((((uint128_t)(x6 + x14) * (x8 + x13)) + ((uint128_t)(x8 + x13) * (x6 + x14))) - (((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)));
+ { uint128_t x17 = ((((uint128_t)(x4 + x12) * (x8 + x13)) + (((uint128_t)(x6 + x14) * (x6 + x14)) + ((uint128_t)(x8 + x13) * (x4 + x12)))) - (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + ((uint128_t)x8 * x4))));
+ { uint128_t x18 = ((((uint128_t)(x2 + x10) * (x8 + x13)) + (((uint128_t)(x4 + x12) * (x6 + x14)) + (((uint128_t)(x6 + x14) * (x4 + x12)) + ((uint128_t)(x8 + x13) * (x2 + x10))))) - (((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))));
+ { uint128_t x19 = ((((uint128_t)(x2 + x10) * (x6 + x14)) + (((uint128_t)(x4 + x12) * (x4 + x12)) + ((uint128_t)(x6 + x14) * (x2 + x10)))) - (((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))));
+ { uint128_t x20 = ((((uint128_t)(x2 + x10) * (x4 + x12)) + ((uint128_t)(x4 + x12) * (x2 + x10))) - (((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)));
+ { uint128_t x21 = (((uint128_t)(x2 + x10) * (x2 + x10)) - ((uint128_t)x2 * x2));
+ { uint128_t x22 = (((((uint128_t)x8 * x8) + ((uint128_t)x13 * x13)) + x19) + x15);
+ { uint128_t x23 = ((((((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)) + (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))) + x20) + x16);
+ { uint128_t x24 = ((((((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + ((uint128_t)x8 * x4))) + (((uint128_t)x12 * x13) + (((uint128_t)x14 * x14) + ((uint128_t)x13 * x12)))) + x21) + x17);
+ { uint128_t x25 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10)))));
+ { uint128_t x26 = (((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + ((uint128_t)x14 * x10)))) + x15);
+ { uint128_t x27 = (((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (((uint128_t)x10 * x12) + ((uint128_t)x12 * x10))) + x16);
+ { uint128_t x28 = ((((uint128_t)x2 * x2) + ((uint128_t)x10 * x10)) + x17);
+ { uint64_t x29 = (uint64_t) (x25 >> 0x38);
+ { uint64_t x30 = ((uint64_t)x25 & 0xffffffffffffff);
+ { uint64_t x31 = (uint64_t) (x18 >> 0x38);
+ { uint64_t x32 = ((uint64_t)x18 & 0xffffffffffffff);
+ { uint128_t x33 = (((uint128_t)0x100000000000000 * x31) + x32);
+ { uint64_t x34 = (uint64_t) (x33 >> 0x38);
+ { uint64_t x35 = ((uint64_t)x33 & 0xffffffffffffff);
+ { uint128_t x36 = ((x29 + x24) + x34);
+ { uint64_t x37 = (uint64_t) (x36 >> 0x38);
+ { uint64_t x38 = ((uint64_t)x36 & 0xffffffffffffff);
+ { uint128_t x39 = (x28 + x34);
+ { uint64_t x40 = (uint64_t) (x39 >> 0x38);
+ { uint64_t x41 = ((uint64_t)x39 & 0xffffffffffffff);
+ { uint128_t x42 = (x37 + x23);
+ { uint64_t x43 = (uint64_t) (x42 >> 0x38);
+ { uint64_t x44 = ((uint64_t)x42 & 0xffffffffffffff);
+ { uint128_t x45 = (x40 + x27);
+ { uint64_t x46 = (uint64_t) (x45 >> 0x38);
+ { uint64_t x47 = ((uint64_t)x45 & 0xffffffffffffff);
+ { uint128_t x48 = (x43 + x22);
+ { uint64_t x49 = (uint64_t) (x48 >> 0x38);
+ { uint64_t x50 = ((uint64_t)x48 & 0xffffffffffffff);
+ { uint128_t x51 = (x46 + x26);
+ { uint64_t x52 = (uint64_t) (x51 >> 0x38);
+ { uint64_t x53 = ((uint64_t)x51 & 0xffffffffffffff);
+ { uint64_t x54 = (x49 + x35);
+ { uint64_t x55 = (x54 >> 0x38);
+ { uint64_t x56 = (x54 & 0xffffffffffffff);
+ { uint64_t x57 = (x52 + x30);
+ { uint64_t x58 = (x57 >> 0x38);
+ { uint64_t x59 = (x57 & 0xffffffffffffff);
+ { uint64_t x60 = ((0x100000000000000 * x55) + x56);
+ { uint64_t x61 = (x60 >> 0x38);
+ { uint64_t x62 = (x60 & 0xffffffffffffff);
+ { uint64_t x63 = ((x58 + x38) + x61);
+ { uint64_t x64 = (x63 >> 0x38);
+ { uint64_t x65 = (x63 & 0xffffffffffffff);
+ { uint64_t x66 = (x41 + x61);
+ { uint64_t x67 = (x66 >> 0x38);
+ { uint64_t x68 = (x66 & 0xffffffffffffff);
+ out[0] = x68;
+ out[1] = (x67 + x47);
+ out[2] = x53;
+ out[3] = x59;
+ out[4] = x65;
+ out[5] = (x64 + x44);
+ out[6] = x50;
+ out[7] = x62;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e448m2e224m1/freeze.c b/src/Specific/solinas64_2e448m2e224m1/freeze.c
index ffc6a3184..37fc5f7d3 100644
--- a/src/Specific/solinas64_2e448m2e224m1/freeze.c
+++ b/src/Specific/solinas64_2e448m2e224m1/freeze.c
@@ -1,25 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x16;
-out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 56 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffffffffffff;;
+static void freeze(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xffffffffffffff);
+ { uint64_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x17, Return x4, 0xffffffffffffff);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x20, Return x6, 0xffffffffffffff);
+ { uint64_t x25, uint8_t x26 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x8, 0xffffffffffffff);
+ { uint64_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x26, Return x10, 0xfffffffffffffe);
+ { uint64_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x29, Return x12, 0xffffffffffffff);
+ { uint64_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x32, Return x14, 0xffffffffffffff);
+ { uint64_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x35, Return x13, 0xffffffffffffff);
+ { uint64_t x39 = (uint64_t)cmovznz(x38, 0x0, 0xffffffffffffffffL);
+ { uint64_t x40 = (x39 & 0xffffffffffffff);
+ { uint64_t x42, uint8_t x43 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x16, Return x40);
+ { uint64_t x44 = (x39 & 0xffffffffffffff);
+ { uint64_t x46, uint8_t x47 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x43, Return x19, Return x44);
+ { uint64_t x48 = (x39 & 0xffffffffffffff);
+ { uint64_t x50, uint8_t x51 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x47, Return x22, Return x48);
+ { uint64_t x52 = (x39 & 0xffffffffffffff);
+ { uint64_t x54, uint8_t x55 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x51, Return x25, Return x52);
+ { uint64_t x56 = (x39 & 0xfffffffffffffe);
+ { uint64_t x58, uint8_t x59 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x55, Return x28, Return x56);
+ { uint64_t x60 = (x39 & 0xffffffffffffff);
+ { uint64_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x59, Return x31, Return x60);
+ { uint64_t x64 = (x39 & 0xffffffffffffff);
+ { uint64_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x63, Return x34, Return x64);
+ { uint64_t x68 = (x39 & 0xffffffffffffff);
+ { uint64_t x70, uint8_t _ = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x67, Return x37, Return x68);
+ out[0] = x42;
+ out[1] = x46;
+ out[2] = x50;
+ out[3] = x54;
+ out[4] = x58;
+ out[5] = x62;
+ out[6] = x66;
+ out[7] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e450m2e225m1/femul.c b/src/Specific/solinas64_2e450m2e225m1/femul.c
index 43d167528..5412ab232 100644
--- a/src/Specific/solinas64_2e450m2e225m1/femul.c
+++ b/src/Specific/solinas64_2e450m2e225m1/femul.c
@@ -1,83 +1,81 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint128_t x32 = (((uint128_t)(x11 + x16) * (x25 + x30)) - ((uint128_t)x11 * x25));
-{ uint128_t x33 = ((((uint128_t)(x9 + x17) * (x25 + x30)) + ((uint128_t)(x11 + x16) * (x23 + x31))) - (((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)));
-{ uint128_t x34 = (((0x2 * ((uint128_t)(x7 + x15) * (x25 + x30))) + ((0x2 * ((uint128_t)(x9 + x17) * (x23 + x31))) + (0x2 * ((uint128_t)(x11 + x16) * (x21 + x29))))) - ((0x2 * ((uint128_t)x7 * x25)) + ((0x2 * ((uint128_t)x9 * x23)) + (0x2 * ((uint128_t)x11 * x21)))));
-{ uint128_t x35 = ((((uint128_t)(x5 + x13) * (x25 + x30)) + ((0x2 * ((uint128_t)(x7 + x15) * (x23 + x31))) + ((0x2 * ((uint128_t)(x9 + x17) * (x21 + x29))) + ((uint128_t)(x11 + x16) * (x19 + x27))))) - (((uint128_t)x5 * x25) + ((0x2 * ((uint128_t)x7 * x23)) + ((0x2 * ((uint128_t)x9 * x21)) + ((uint128_t)x11 * x19)))));
-{ uint128_t x36 = ((((uint128_t)(x5 + x13) * (x23 + x31)) + ((0x2 * ((uint128_t)(x7 + x15) * (x21 + x29))) + ((uint128_t)(x9 + x17) * (x19 + x27)))) - (((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((uint128_t)x9 * x19))));
-{ uint128_t x37 = ((((uint128_t)(x5 + x13) * (x21 + x29)) + ((uint128_t)(x7 + x15) * (x19 + x27))) - (((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)));
-{ uint128_t x38 = (((uint128_t)(x5 + x13) * (x19 + x27)) - ((uint128_t)x5 * x19));
-{ uint128_t x39 = (((((uint128_t)x11 * x25) + ((uint128_t)x16 * x30)) + x36) + x32);
-{ uint128_t x40 = ((((((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)) + (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))) + x37) + x33);
-{ uint128_t x41 = (((((0x2 * ((uint128_t)x7 * x25)) + ((0x2 * ((uint128_t)x9 * x23)) + (0x2 * ((uint128_t)x11 * x21)))) + ((0x2 * ((uint128_t)x15 * x30)) + ((0x2 * ((uint128_t)x17 * x31)) + (0x2 * ((uint128_t)x16 * x29))))) + x38) + x34);
-{ uint128_t x42 = ((((uint128_t)x5 * x25) + ((0x2 * ((uint128_t)x7 * x23)) + ((0x2 * ((uint128_t)x9 * x21)) + ((uint128_t)x11 * x19)))) + (((uint128_t)x13 * x30) + ((0x2 * ((uint128_t)x15 * x31)) + ((0x2 * ((uint128_t)x17 * x29)) + ((uint128_t)x16 * x27)))));
-{ uint128_t x43 = (((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((uint128_t)x9 * x19))) + (((uint128_t)x13 * x31) + ((0x2 * ((uint128_t)x15 * x29)) + ((uint128_t)x17 * x27)))) + x32);
-{ uint128_t x44 = (((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (((uint128_t)x13 * x29) + ((uint128_t)x15 * x27))) + x33);
-{ uint128_t x45 = ((((uint128_t)x5 * x19) + ((uint128_t)x13 * x27)) + x34);
-{ uint64_t x46 = (uint64_t) (x42 >> 0x38);
-{ uint64_t x47 = ((uint64_t)x42 & 0xffffffffffffff);
-{ uint128_t x48 = (x35 >> 0x38);
-{ uint64_t x49 = ((uint64_t)x35 & 0xffffffffffffff);
-{ uint128_t x50 = ((0x100000000000000 * x48) + x49);
-{ uint128_t x51 = (x50 >> 0x38);
-{ uint64_t x52 = ((uint64_t)x50 & 0xffffffffffffff);
-{ uint128_t x53 = ((x46 + x41) + x51);
-{ uint64_t x54 = (uint64_t) (x53 >> 0x39);
-{ uint64_t x55 = ((uint64_t)x53 & 0x1ffffffffffffff);
-{ uint128_t x56 = (x45 + x51);
-{ uint64_t x57 = (uint64_t) (x56 >> 0x39);
-{ uint64_t x58 = ((uint64_t)x56 & 0x1ffffffffffffff);
-{ uint128_t x59 = (x54 + x40);
-{ uint64_t x60 = (uint64_t) (x59 >> 0x38);
-{ uint64_t x61 = ((uint64_t)x59 & 0xffffffffffffff);
-{ uint128_t x62 = (x57 + x44);
-{ uint64_t x63 = (uint64_t) (x62 >> 0x38);
-{ uint64_t x64 = ((uint64_t)x62 & 0xffffffffffffff);
-{ uint128_t x65 = (x60 + x39);
-{ uint128_t x66 = (x65 >> 0x38);
-{ uint64_t x67 = ((uint64_t)x65 & 0xffffffffffffff);
-{ uint128_t x68 = (x63 + x43);
-{ uint64_t x69 = (uint64_t) (x68 >> 0x38);
-{ uint64_t x70 = ((uint64_t)x68 & 0xffffffffffffff);
-{ uint128_t x71 = (x66 + x52);
-{ uint64_t x72 = (uint64_t) (x71 >> 0x38);
-{ uint64_t x73 = ((uint64_t)x71 & 0xffffffffffffff);
-{ uint64_t x74 = (x69 + x47);
-{ uint64_t x75 = (x74 >> 0x38);
-{ uint64_t x76 = (x74 & 0xffffffffffffff);
-{ uint128_t x77 = (((uint128_t)0x100000000000000 * x72) + x73);
-{ uint64_t x78 = (uint64_t) (x77 >> 0x38);
-{ uint64_t x79 = ((uint64_t)x77 & 0xffffffffffffff);
-{ uint64_t x80 = ((x75 + x55) + x78);
-{ uint64_t x81 = (x80 >> 0x39);
-{ uint64_t x82 = (x80 & 0x1ffffffffffffff);
-{ uint64_t x83 = (x58 + x78);
-{ uint64_t x84 = (x83 >> 0x39);
-{ uint64_t x85 = (x83 & 0x1ffffffffffffff);
-out[0] = x79;
-out[1] = x67;
-out[2] = x81 + x61;
-out[3] = x82;
-out[4] = x76;
-out[5] = x70;
-out[6] = x84 + x64;
-out[7] = x85;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void femul(uint64_t out[8], const uint64_t in1[8], const uint64_t in2[8]) {
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x30 = in2[7];
+ { const uint64_t x31 = in2[6];
+ { const uint64_t x29 = in2[5];
+ { const uint64_t x27 = in2[4];
+ { const uint64_t x25 = in2[3];
+ { const uint64_t x23 = in2[2];
+ { const uint64_t x21 = in2[1];
+ { const uint64_t x19 = in2[0];
+ { uint128_t x32 = (((uint128_t)(x11 + x16) * (x25 + x30)) - ((uint128_t)x11 * x25));
+ { uint128_t x33 = ((((uint128_t)(x9 + x17) * (x25 + x30)) + ((uint128_t)(x11 + x16) * (x23 + x31))) - (((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)));
+ { uint128_t x34 = (((0x2 * ((uint128_t)(x7 + x15) * (x25 + x30))) + ((0x2 * ((uint128_t)(x9 + x17) * (x23 + x31))) + (0x2 * ((uint128_t)(x11 + x16) * (x21 + x29))))) - ((0x2 * ((uint128_t)x7 * x25)) + ((0x2 * ((uint128_t)x9 * x23)) + (0x2 * ((uint128_t)x11 * x21)))));
+ { uint128_t x35 = ((((uint128_t)(x5 + x13) * (x25 + x30)) + ((0x2 * ((uint128_t)(x7 + x15) * (x23 + x31))) + ((0x2 * ((uint128_t)(x9 + x17) * (x21 + x29))) + ((uint128_t)(x11 + x16) * (x19 + x27))))) - (((uint128_t)x5 * x25) + ((0x2 * ((uint128_t)x7 * x23)) + ((0x2 * ((uint128_t)x9 * x21)) + ((uint128_t)x11 * x19)))));
+ { uint128_t x36 = ((((uint128_t)(x5 + x13) * (x23 + x31)) + ((0x2 * ((uint128_t)(x7 + x15) * (x21 + x29))) + ((uint128_t)(x9 + x17) * (x19 + x27)))) - (((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((uint128_t)x9 * x19))));
+ { uint128_t x37 = ((((uint128_t)(x5 + x13) * (x21 + x29)) + ((uint128_t)(x7 + x15) * (x19 + x27))) - (((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)));
+ { uint128_t x38 = (((uint128_t)(x5 + x13) * (x19 + x27)) - ((uint128_t)x5 * x19));
+ { uint128_t x39 = (((((uint128_t)x11 * x25) + ((uint128_t)x16 * x30)) + x36) + x32);
+ { uint128_t x40 = ((((((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)) + (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))) + x37) + x33);
+ { uint128_t x41 = (((((0x2 * ((uint128_t)x7 * x25)) + ((0x2 * ((uint128_t)x9 * x23)) + (0x2 * ((uint128_t)x11 * x21)))) + ((0x2 * ((uint128_t)x15 * x30)) + ((0x2 * ((uint128_t)x17 * x31)) + (0x2 * ((uint128_t)x16 * x29))))) + x38) + x34);
+ { uint128_t x42 = ((((uint128_t)x5 * x25) + ((0x2 * ((uint128_t)x7 * x23)) + ((0x2 * ((uint128_t)x9 * x21)) + ((uint128_t)x11 * x19)))) + (((uint128_t)x13 * x30) + ((0x2 * ((uint128_t)x15 * x31)) + ((0x2 * ((uint128_t)x17 * x29)) + ((uint128_t)x16 * x27)))));
+ { uint128_t x43 = (((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((uint128_t)x9 * x19))) + (((uint128_t)x13 * x31) + ((0x2 * ((uint128_t)x15 * x29)) + ((uint128_t)x17 * x27)))) + x32);
+ { uint128_t x44 = (((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (((uint128_t)x13 * x29) + ((uint128_t)x15 * x27))) + x33);
+ { uint128_t x45 = ((((uint128_t)x5 * x19) + ((uint128_t)x13 * x27)) + x34);
+ { uint64_t x46 = (uint64_t) (x42 >> 0x38);
+ { uint64_t x47 = ((uint64_t)x42 & 0xffffffffffffff);
+ { uint128_t x48 = (x35 >> 0x38);
+ { uint64_t x49 = ((uint64_t)x35 & 0xffffffffffffff);
+ { uint128_t x50 = ((0x100000000000000 * x48) + x49);
+ { uint128_t x51 = (x50 >> 0x38);
+ { uint64_t x52 = ((uint64_t)x50 & 0xffffffffffffff);
+ { uint128_t x53 = ((x46 + x41) + x51);
+ { uint64_t x54 = (uint64_t) (x53 >> 0x39);
+ { uint64_t x55 = ((uint64_t)x53 & 0x1ffffffffffffff);
+ { uint128_t x56 = (x45 + x51);
+ { uint64_t x57 = (uint64_t) (x56 >> 0x39);
+ { uint64_t x58 = ((uint64_t)x56 & 0x1ffffffffffffff);
+ { uint128_t x59 = (x54 + x40);
+ { uint64_t x60 = (uint64_t) (x59 >> 0x38);
+ { uint64_t x61 = ((uint64_t)x59 & 0xffffffffffffff);
+ { uint128_t x62 = (x57 + x44);
+ { uint64_t x63 = (uint64_t) (x62 >> 0x38);
+ { uint64_t x64 = ((uint64_t)x62 & 0xffffffffffffff);
+ { uint128_t x65 = (x60 + x39);
+ { uint128_t x66 = (x65 >> 0x38);
+ { uint64_t x67 = ((uint64_t)x65 & 0xffffffffffffff);
+ { uint128_t x68 = (x63 + x43);
+ { uint64_t x69 = (uint64_t) (x68 >> 0x38);
+ { uint64_t x70 = ((uint64_t)x68 & 0xffffffffffffff);
+ { uint128_t x71 = (x66 + x52);
+ { uint64_t x72 = (uint64_t) (x71 >> 0x38);
+ { uint64_t x73 = ((uint64_t)x71 & 0xffffffffffffff);
+ { uint64_t x74 = (x69 + x47);
+ { uint64_t x75 = (x74 >> 0x38);
+ { uint64_t x76 = (x74 & 0xffffffffffffff);
+ { uint128_t x77 = (((uint128_t)0x100000000000000 * x72) + x73);
+ { uint64_t x78 = (uint64_t) (x77 >> 0x38);
+ { uint64_t x79 = ((uint64_t)x77 & 0xffffffffffffff);
+ { uint64_t x80 = ((x75 + x55) + x78);
+ { uint64_t x81 = (x80 >> 0x39);
+ { uint64_t x82 = (x80 & 0x1ffffffffffffff);
+ { uint64_t x83 = (x58 + x78);
+ { uint64_t x84 = (x83 >> 0x39);
+ { uint64_t x85 = (x83 & 0x1ffffffffffffff);
+ out[0] = x85;
+ out[1] = (x84 + x64);
+ out[2] = x70;
+ out[3] = x76;
+ out[4] = x82;
+ out[5] = (x81 + x61);
+ out[6] = x67;
+ out[7] = x79;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e450m2e225m1/fesquare.c b/src/Specific/solinas64_2e450m2e225m1/fesquare.c
index f83caa2bf..4b85e2ea5 100644
--- a/src/Specific/solinas64_2e450m2e225m1/fesquare.c
+++ b/src/Specific/solinas64_2e450m2e225m1/fesquare.c
@@ -1,83 +1,73 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x15 = (((uint128_t)(x8 + x13) * (x8 + x13)) - ((uint128_t)x8 * x8));
-{ uint128_t x16 = ((((uint128_t)(x6 + x14) * (x8 + x13)) + ((uint128_t)(x8 + x13) * (x6 + x14))) - (((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)));
-{ uint128_t x17 = (((0x2 * ((uint128_t)(x4 + x12) * (x8 + x13))) + ((0x2 * ((uint128_t)(x6 + x14) * (x6 + x14))) + (0x2 * ((uint128_t)(x8 + x13) * (x4 + x12))))) - ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + (0x2 * ((uint128_t)x8 * x4)))));
-{ uint128_t x18 = ((((uint128_t)(x2 + x10) * (x8 + x13)) + ((0x2 * ((uint128_t)(x4 + x12) * (x6 + x14))) + ((0x2 * ((uint128_t)(x6 + x14) * (x4 + x12))) + ((uint128_t)(x8 + x13) * (x2 + x10))))) - (((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))));
-{ uint128_t x19 = ((((uint128_t)(x2 + x10) * (x6 + x14)) + ((0x2 * ((uint128_t)(x4 + x12) * (x4 + x12))) + ((uint128_t)(x6 + x14) * (x2 + x10)))) - (((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))));
-{ uint128_t x20 = ((((uint128_t)(x2 + x10) * (x4 + x12)) + ((uint128_t)(x4 + x12) * (x2 + x10))) - (((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)));
-{ uint128_t x21 = (((uint128_t)(x2 + x10) * (x2 + x10)) - ((uint128_t)x2 * x2));
-{ uint128_t x22 = (((((uint128_t)x8 * x8) + ((uint128_t)x13 * x13)) + x19) + x15);
-{ uint128_t x23 = ((((((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)) + (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))) + x20) + x16);
-{ uint128_t x24 = (((((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + (0x2 * ((uint128_t)x8 * x4)))) + ((0x2 * ((uint128_t)x12 * x13)) + ((0x2 * ((uint128_t)x14 * x14)) + (0x2 * ((uint128_t)x13 * x12))))) + x21) + x17);
-{ uint128_t x25 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (((uint128_t)x10 * x13) + ((0x2 * ((uint128_t)x12 * x14)) + ((0x2 * ((uint128_t)x14 * x12)) + ((uint128_t)x13 * x10)))));
-{ uint128_t x26 = (((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (((uint128_t)x10 * x14) + ((0x2 * ((uint128_t)x12 * x12)) + ((uint128_t)x14 * x10)))) + x15);
-{ uint128_t x27 = (((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (((uint128_t)x10 * x12) + ((uint128_t)x12 * x10))) + x16);
-{ uint128_t x28 = ((((uint128_t)x2 * x2) + ((uint128_t)x10 * x10)) + x17);
-{ uint64_t x29 = (uint64_t) (x25 >> 0x38);
-{ uint64_t x30 = ((uint64_t)x25 & 0xffffffffffffff);
-{ uint128_t x31 = (x18 >> 0x38);
-{ uint64_t x32 = ((uint64_t)x18 & 0xffffffffffffff);
-{ uint128_t x33 = ((0x100000000000000 * x31) + x32);
-{ uint128_t x34 = (x33 >> 0x38);
-{ uint64_t x35 = ((uint64_t)x33 & 0xffffffffffffff);
-{ uint128_t x36 = ((x29 + x24) + x34);
-{ uint64_t x37 = (uint64_t) (x36 >> 0x39);
-{ uint64_t x38 = ((uint64_t)x36 & 0x1ffffffffffffff);
-{ uint128_t x39 = (x28 + x34);
-{ uint64_t x40 = (uint64_t) (x39 >> 0x39);
-{ uint64_t x41 = ((uint64_t)x39 & 0x1ffffffffffffff);
-{ uint128_t x42 = (x37 + x23);
-{ uint64_t x43 = (uint64_t) (x42 >> 0x38);
-{ uint64_t x44 = ((uint64_t)x42 & 0xffffffffffffff);
-{ uint128_t x45 = (x40 + x27);
-{ uint64_t x46 = (uint64_t) (x45 >> 0x38);
-{ uint64_t x47 = ((uint64_t)x45 & 0xffffffffffffff);
-{ uint128_t x48 = (x43 + x22);
-{ uint128_t x49 = (x48 >> 0x38);
-{ uint64_t x50 = ((uint64_t)x48 & 0xffffffffffffff);
-{ uint128_t x51 = (x46 + x26);
-{ uint64_t x52 = (uint64_t) (x51 >> 0x38);
-{ uint64_t x53 = ((uint64_t)x51 & 0xffffffffffffff);
-{ uint128_t x54 = (x49 + x35);
-{ uint64_t x55 = (uint64_t) (x54 >> 0x38);
-{ uint64_t x56 = ((uint64_t)x54 & 0xffffffffffffff);
-{ uint64_t x57 = (x52 + x30);
-{ uint64_t x58 = (x57 >> 0x38);
-{ uint64_t x59 = (x57 & 0xffffffffffffff);
-{ uint128_t x60 = (((uint128_t)0x100000000000000 * x55) + x56);
-{ uint64_t x61 = (uint64_t) (x60 >> 0x38);
-{ uint64_t x62 = ((uint64_t)x60 & 0xffffffffffffff);
-{ uint64_t x63 = ((x58 + x38) + x61);
-{ uint64_t x64 = (x63 >> 0x39);
-{ uint64_t x65 = (x63 & 0x1ffffffffffffff);
-{ uint64_t x66 = (x41 + x61);
-{ uint64_t x67 = (x66 >> 0x39);
-{ uint64_t x68 = (x66 & 0x1ffffffffffffff);
-out[0] = x62;
-out[1] = x50;
-out[2] = x64 + x44;
-out[3] = x65;
-out[4] = x59;
-out[5] = x53;
-out[6] = x67 + x47;
-out[7] = x68;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void fesquare(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x15 = (((uint128_t)(x8 + x13) * (x8 + x13)) - ((uint128_t)x8 * x8));
+ { uint128_t x16 = ((((uint128_t)(x6 + x14) * (x8 + x13)) + ((uint128_t)(x8 + x13) * (x6 + x14))) - (((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)));
+ { uint128_t x17 = (((0x2 * ((uint128_t)(x4 + x12) * (x8 + x13))) + ((0x2 * ((uint128_t)(x6 + x14) * (x6 + x14))) + (0x2 * ((uint128_t)(x8 + x13) * (x4 + x12))))) - ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + (0x2 * ((uint128_t)x8 * x4)))));
+ { uint128_t x18 = ((((uint128_t)(x2 + x10) * (x8 + x13)) + ((0x2 * ((uint128_t)(x4 + x12) * (x6 + x14))) + ((0x2 * ((uint128_t)(x6 + x14) * (x4 + x12))) + ((uint128_t)(x8 + x13) * (x2 + x10))))) - (((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))));
+ { uint128_t x19 = ((((uint128_t)(x2 + x10) * (x6 + x14)) + ((0x2 * ((uint128_t)(x4 + x12) * (x4 + x12))) + ((uint128_t)(x6 + x14) * (x2 + x10)))) - (((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))));
+ { uint128_t x20 = ((((uint128_t)(x2 + x10) * (x4 + x12)) + ((uint128_t)(x4 + x12) * (x2 + x10))) - (((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)));
+ { uint128_t x21 = (((uint128_t)(x2 + x10) * (x2 + x10)) - ((uint128_t)x2 * x2));
+ { uint128_t x22 = (((((uint128_t)x8 * x8) + ((uint128_t)x13 * x13)) + x19) + x15);
+ { uint128_t x23 = ((((((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)) + (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))) + x20) + x16);
+ { uint128_t x24 = (((((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + (0x2 * ((uint128_t)x8 * x4)))) + ((0x2 * ((uint128_t)x12 * x13)) + ((0x2 * ((uint128_t)x14 * x14)) + (0x2 * ((uint128_t)x13 * x12))))) + x21) + x17);
+ { uint128_t x25 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (((uint128_t)x10 * x13) + ((0x2 * ((uint128_t)x12 * x14)) + ((0x2 * ((uint128_t)x14 * x12)) + ((uint128_t)x13 * x10)))));
+ { uint128_t x26 = (((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (((uint128_t)x10 * x14) + ((0x2 * ((uint128_t)x12 * x12)) + ((uint128_t)x14 * x10)))) + x15);
+ { uint128_t x27 = (((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (((uint128_t)x10 * x12) + ((uint128_t)x12 * x10))) + x16);
+ { uint128_t x28 = ((((uint128_t)x2 * x2) + ((uint128_t)x10 * x10)) + x17);
+ { uint64_t x29 = (uint64_t) (x25 >> 0x38);
+ { uint64_t x30 = ((uint64_t)x25 & 0xffffffffffffff);
+ { uint128_t x31 = (x18 >> 0x38);
+ { uint64_t x32 = ((uint64_t)x18 & 0xffffffffffffff);
+ { uint128_t x33 = ((0x100000000000000 * x31) + x32);
+ { uint128_t x34 = (x33 >> 0x38);
+ { uint64_t x35 = ((uint64_t)x33 & 0xffffffffffffff);
+ { uint128_t x36 = ((x29 + x24) + x34);
+ { uint64_t x37 = (uint64_t) (x36 >> 0x39);
+ { uint64_t x38 = ((uint64_t)x36 & 0x1ffffffffffffff);
+ { uint128_t x39 = (x28 + x34);
+ { uint64_t x40 = (uint64_t) (x39 >> 0x39);
+ { uint64_t x41 = ((uint64_t)x39 & 0x1ffffffffffffff);
+ { uint128_t x42 = (x37 + x23);
+ { uint64_t x43 = (uint64_t) (x42 >> 0x38);
+ { uint64_t x44 = ((uint64_t)x42 & 0xffffffffffffff);
+ { uint128_t x45 = (x40 + x27);
+ { uint64_t x46 = (uint64_t) (x45 >> 0x38);
+ { uint64_t x47 = ((uint64_t)x45 & 0xffffffffffffff);
+ { uint128_t x48 = (x43 + x22);
+ { uint128_t x49 = (x48 >> 0x38);
+ { uint64_t x50 = ((uint64_t)x48 & 0xffffffffffffff);
+ { uint128_t x51 = (x46 + x26);
+ { uint64_t x52 = (uint64_t) (x51 >> 0x38);
+ { uint64_t x53 = ((uint64_t)x51 & 0xffffffffffffff);
+ { uint128_t x54 = (x49 + x35);
+ { uint64_t x55 = (uint64_t) (x54 >> 0x38);
+ { uint64_t x56 = ((uint64_t)x54 & 0xffffffffffffff);
+ { uint64_t x57 = (x52 + x30);
+ { uint64_t x58 = (x57 >> 0x38);
+ { uint64_t x59 = (x57 & 0xffffffffffffff);
+ { uint128_t x60 = (((uint128_t)0x100000000000000 * x55) + x56);
+ { uint64_t x61 = (uint64_t) (x60 >> 0x38);
+ { uint64_t x62 = ((uint64_t)x60 & 0xffffffffffffff);
+ { uint64_t x63 = ((x58 + x38) + x61);
+ { uint64_t x64 = (x63 >> 0x39);
+ { uint64_t x65 = (x63 & 0x1ffffffffffffff);
+ { uint64_t x66 = (x41 + x61);
+ { uint64_t x67 = (x66 >> 0x39);
+ { uint64_t x68 = (x66 & 0x1ffffffffffffff);
+ out[0] = x68;
+ out[1] = (x67 + x47);
+ out[2] = x53;
+ out[3] = x59;
+ out[4] = x65;
+ out[5] = (x64 + x44);
+ out[6] = x50;
+ out[7] = x62;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e450m2e225m1/freeze.c b/src/Specific/solinas64_2e450m2e225m1/freeze.c
index ab3776151..22814aaca 100644
--- a/src/Specific/solinas64_2e450m2e225m1/freeze.c
+++ b/src/Specific/solinas64_2e450m2e225m1/freeze.c
@@ -1,25 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x16;
-out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 57 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x1ffffffffffffff;;
+static void freeze(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x1ffffffffffffff);
+ { uint64_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x17, Return x4, 0xffffffffffffff);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x20, Return x6, 0xffffffffffffff);
+ { uint64_t x25, uint8_t x26 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x8, 0xffffffffffffff);
+ { uint64_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x26, Return x10, 0x1fffffffffffffe);
+ { uint64_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x29, Return x12, 0xffffffffffffff);
+ { uint64_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x32, Return x14, 0xffffffffffffff);
+ { uint64_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x35, Return x13, 0xffffffffffffff);
+ { uint64_t x39 = (uint64_t)cmovznz(x38, 0x0, 0xffffffffffffffffL);
+ { uint64_t x40 = (x39 & 0x1ffffffffffffff);
+ { uint64_t x42, uint8_t x43 = Op (Syntax.AddWithGetCarry 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x16, Return x40);
+ { uint64_t x44 = (x39 & 0xffffffffffffff);
+ { uint64_t x46, uint8_t x47 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x43, Return x19, Return x44);
+ { uint64_t x48 = (x39 & 0xffffffffffffff);
+ { uint64_t x50, uint8_t x51 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x47, Return x22, Return x48);
+ { uint64_t x52 = (x39 & 0xffffffffffffff);
+ { uint64_t x54, uint8_t x55 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x51, Return x25, Return x52);
+ { uint64_t x56 = (x39 & 0x1fffffffffffffe);
+ { uint64_t x58, uint8_t x59 = Op (Syntax.AddWithGetCarry 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x55, Return x28, Return x56);
+ { uint64_t x60 = (x39 & 0xffffffffffffff);
+ { uint64_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x59, Return x31, Return x60);
+ { uint64_t x64 = (x39 & 0xffffffffffffff);
+ { uint64_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x63, Return x34, Return x64);
+ { uint64_t x68 = (x39 & 0xffffffffffffff);
+ { uint64_t x70, uint8_t _ = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x67, Return x37, Return x68);
+ out[0] = x42;
+ out[1] = x46;
+ out[2] = x50;
+ out[3] = x54;
+ out[4] = x58;
+ out[5] = x62;
+ out[6] = x66;
+ out[7] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e452m3/femul.c b/src/Specific/solinas64_2e452m3/femul.c
index ac501cb79..f36a466ea 100644
--- a/src/Specific/solinas64_2e452m3/femul.c
+++ b/src/Specific/solinas64_2e452m3/femul.c
@@ -1,66 +1,64 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint128_t x32 = (((uint128_t)x5 * x30) + (((uint128_t)x7 * x31) + (((uint128_t)x9 * x29) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + (((uint128_t)x17 * x21) + ((uint128_t)x16 * x19))))))));
-{ uint128_t x33 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + (((uint128_t)x9 * x27) + ((0x2 * ((uint128_t)x11 * x25)) + (((uint128_t)x13 * x23) + ((0x2 * ((uint128_t)x15 * x21)) + ((uint128_t)x17 * x19))))))) + (0x3 * (0x2 * ((uint128_t)x16 * x30))));
-{ uint128_t x34 = ((((uint128_t)x5 * x29) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x15 * x19)))))) + (0x3 * (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))));
-{ uint128_t x35 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + (((uint128_t)x9 * x23) + ((0x2 * ((uint128_t)x11 * x21)) + ((uint128_t)x13 * x19))))) + (0x3 * ((0x2 * ((uint128_t)x15 * x30)) + (((uint128_t)x17 * x31) + (0x2 * ((uint128_t)x16 * x29))))));
-{ uint128_t x36 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))) + (0x3 * (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27))))));
-{ uint128_t x37 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((uint128_t)x9 * x19))) + (0x3 * ((0x2 * ((uint128_t)x11 * x30)) + (((uint128_t)x13 * x31) + ((0x2 * ((uint128_t)x15 * x29)) + (((uint128_t)x17 * x27) + (0x2 * ((uint128_t)x16 * x25))))))));
-{ uint128_t x38 = ((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (0x3 * (((uint128_t)x9 * x30) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + ((uint128_t)x16 * x23))))))));
-{ uint128_t x39 = (((uint128_t)x5 * x19) + (0x3 * ((0x2 * ((uint128_t)x7 * x30)) + (((uint128_t)x9 * x31) + ((0x2 * ((uint128_t)x11 * x29)) + (((uint128_t)x13 * x27) + ((0x2 * ((uint128_t)x15 * x25)) + (((uint128_t)x17 * x23) + (0x2 * ((uint128_t)x16 * x21))))))))));
-{ uint128_t x40 = (x39 >> 0x39);
-{ uint64_t x41 = ((uint64_t)x39 & 0x1ffffffffffffff);
-{ uint128_t x42 = (x40 + x38);
-{ uint128_t x43 = (x42 >> 0x38);
-{ uint64_t x44 = ((uint64_t)x42 & 0xffffffffffffff);
-{ uint128_t x45 = (x43 + x37);
-{ uint128_t x46 = (x45 >> 0x39);
-{ uint64_t x47 = ((uint64_t)x45 & 0x1ffffffffffffff);
-{ uint128_t x48 = (x46 + x36);
-{ uint128_t x49 = (x48 >> 0x38);
-{ uint64_t x50 = ((uint64_t)x48 & 0xffffffffffffff);
-{ uint128_t x51 = (x49 + x35);
-{ uint64_t x52 = (uint64_t) (x51 >> 0x39);
-{ uint64_t x53 = ((uint64_t)x51 & 0x1ffffffffffffff);
-{ uint128_t x54 = (x52 + x34);
-{ uint128_t x55 = (x54 >> 0x38);
-{ uint64_t x56 = ((uint64_t)x54 & 0xffffffffffffff);
-{ uint128_t x57 = (x55 + x33);
-{ uint64_t x58 = (uint64_t) (x57 >> 0x39);
-{ uint64_t x59 = ((uint64_t)x57 & 0x1ffffffffffffff);
-{ uint128_t x60 = (x58 + x32);
-{ uint64_t x61 = (uint64_t) (x60 >> 0x38);
-{ uint64_t x62 = ((uint64_t)x60 & 0xffffffffffffff);
-{ uint128_t x63 = (x41 + ((uint128_t)0x3 * x61));
-{ uint64_t x64 = (uint64_t) (x63 >> 0x39);
-{ uint64_t x65 = ((uint64_t)x63 & 0x1ffffffffffffff);
-{ uint64_t x66 = (x64 + x44);
-{ uint64_t x67 = (x66 >> 0x38);
-{ uint64_t x68 = (x66 & 0xffffffffffffff);
-out[0] = x62;
-out[1] = x59;
-out[2] = x56;
-out[3] = x53;
-out[4] = x50;
-out[5] = x67 + x47;
-out[6] = x68;
-out[7] = x65;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void femul(uint64_t out[8], const uint64_t in1[8], const uint64_t in2[8]) {
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x30 = in2[7];
+ { const uint64_t x31 = in2[6];
+ { const uint64_t x29 = in2[5];
+ { const uint64_t x27 = in2[4];
+ { const uint64_t x25 = in2[3];
+ { const uint64_t x23 = in2[2];
+ { const uint64_t x21 = in2[1];
+ { const uint64_t x19 = in2[0];
+ { uint128_t x32 = (((uint128_t)x5 * x30) + (((uint128_t)x7 * x31) + (((uint128_t)x9 * x29) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + (((uint128_t)x17 * x21) + ((uint128_t)x16 * x19))))))));
+ { uint128_t x33 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + (((uint128_t)x9 * x27) + ((0x2 * ((uint128_t)x11 * x25)) + (((uint128_t)x13 * x23) + ((0x2 * ((uint128_t)x15 * x21)) + ((uint128_t)x17 * x19))))))) + (0x3 * (0x2 * ((uint128_t)x16 * x30))));
+ { uint128_t x34 = ((((uint128_t)x5 * x29) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x15 * x19)))))) + (0x3 * (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))));
+ { uint128_t x35 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + (((uint128_t)x9 * x23) + ((0x2 * ((uint128_t)x11 * x21)) + ((uint128_t)x13 * x19))))) + (0x3 * ((0x2 * ((uint128_t)x15 * x30)) + (((uint128_t)x17 * x31) + (0x2 * ((uint128_t)x16 * x29))))));
+ { uint128_t x36 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))) + (0x3 * (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27))))));
+ { uint128_t x37 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((uint128_t)x9 * x19))) + (0x3 * ((0x2 * ((uint128_t)x11 * x30)) + (((uint128_t)x13 * x31) + ((0x2 * ((uint128_t)x15 * x29)) + (((uint128_t)x17 * x27) + (0x2 * ((uint128_t)x16 * x25))))))));
+ { uint128_t x38 = ((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (0x3 * (((uint128_t)x9 * x30) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + ((uint128_t)x16 * x23))))))));
+ { uint128_t x39 = (((uint128_t)x5 * x19) + (0x3 * ((0x2 * ((uint128_t)x7 * x30)) + (((uint128_t)x9 * x31) + ((0x2 * ((uint128_t)x11 * x29)) + (((uint128_t)x13 * x27) + ((0x2 * ((uint128_t)x15 * x25)) + (((uint128_t)x17 * x23) + (0x2 * ((uint128_t)x16 * x21))))))))));
+ { uint128_t x40 = (x39 >> 0x39);
+ { uint64_t x41 = ((uint64_t)x39 & 0x1ffffffffffffff);
+ { uint128_t x42 = (x40 + x38);
+ { uint128_t x43 = (x42 >> 0x38);
+ { uint64_t x44 = ((uint64_t)x42 & 0xffffffffffffff);
+ { uint128_t x45 = (x43 + x37);
+ { uint128_t x46 = (x45 >> 0x39);
+ { uint64_t x47 = ((uint64_t)x45 & 0x1ffffffffffffff);
+ { uint128_t x48 = (x46 + x36);
+ { uint128_t x49 = (x48 >> 0x38);
+ { uint64_t x50 = ((uint64_t)x48 & 0xffffffffffffff);
+ { uint128_t x51 = (x49 + x35);
+ { uint64_t x52 = (uint64_t) (x51 >> 0x39);
+ { uint64_t x53 = ((uint64_t)x51 & 0x1ffffffffffffff);
+ { uint128_t x54 = (x52 + x34);
+ { uint128_t x55 = (x54 >> 0x38);
+ { uint64_t x56 = ((uint64_t)x54 & 0xffffffffffffff);
+ { uint128_t x57 = (x55 + x33);
+ { uint64_t x58 = (uint64_t) (x57 >> 0x39);
+ { uint64_t x59 = ((uint64_t)x57 & 0x1ffffffffffffff);
+ { uint128_t x60 = (x58 + x32);
+ { uint64_t x61 = (uint64_t) (x60 >> 0x38);
+ { uint64_t x62 = ((uint64_t)x60 & 0xffffffffffffff);
+ { uint128_t x63 = (x41 + ((uint128_t)0x3 * x61));
+ { uint64_t x64 = (uint64_t) (x63 >> 0x39);
+ { uint64_t x65 = ((uint64_t)x63 & 0x1ffffffffffffff);
+ { uint64_t x66 = (x64 + x44);
+ { uint64_t x67 = (x66 >> 0x38);
+ { uint64_t x68 = (x66 & 0xffffffffffffff);
+ out[0] = x65;
+ out[1] = x68;
+ out[2] = (x67 + x47);
+ out[3] = x50;
+ out[4] = x53;
+ out[5] = x56;
+ out[6] = x59;
+ out[7] = x62;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e452m3/fesquare.c b/src/Specific/solinas64_2e452m3/fesquare.c
index 7b57e86be..18c217c71 100644
--- a/src/Specific/solinas64_2e452m3/fesquare.c
+++ b/src/Specific/solinas64_2e452m3/fesquare.c
@@ -1,66 +1,56 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x15 = (((uint128_t)x2 * x13) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x13 * x2))))))));
-{ uint128_t x16 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + (((uint128_t)x6 * x10) + ((0x2 * ((uint128_t)x8 * x8)) + (((uint128_t)x10 * x6) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x3 * (0x2 * ((uint128_t)x13 * x13))));
-{ uint128_t x17 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x3 * (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))));
-{ uint128_t x18 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + (((uint128_t)x6 * x6) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x3 * ((0x2 * ((uint128_t)x12 * x13)) + (((uint128_t)x14 * x14) + (0x2 * ((uint128_t)x13 * x12))))));
-{ uint128_t x19 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x3 * (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10))))));
-{ uint128_t x20 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x3 * ((0x2 * ((uint128_t)x8 * x13)) + (((uint128_t)x10 * x14) + ((0x2 * ((uint128_t)x12 * x12)) + (((uint128_t)x14 * x10) + (0x2 * ((uint128_t)x13 * x8))))))));
-{ uint128_t x21 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x3 * (((uint128_t)x6 * x13) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + ((uint128_t)x13 * x6))))))));
-{ uint128_t x22 = (((uint128_t)x2 * x2) + (0x3 * ((0x2 * ((uint128_t)x4 * x13)) + (((uint128_t)x6 * x14) + ((0x2 * ((uint128_t)x8 * x12)) + (((uint128_t)x10 * x10) + ((0x2 * ((uint128_t)x12 * x8)) + (((uint128_t)x14 * x6) + (0x2 * ((uint128_t)x13 * x4))))))))));
-{ uint128_t x23 = (x22 >> 0x39);
-{ uint64_t x24 = ((uint64_t)x22 & 0x1ffffffffffffff);
-{ uint128_t x25 = (x23 + x21);
-{ uint128_t x26 = (x25 >> 0x38);
-{ uint64_t x27 = ((uint64_t)x25 & 0xffffffffffffff);
-{ uint128_t x28 = (x26 + x20);
-{ uint128_t x29 = (x28 >> 0x39);
-{ uint64_t x30 = ((uint64_t)x28 & 0x1ffffffffffffff);
-{ uint128_t x31 = (x29 + x19);
-{ uint128_t x32 = (x31 >> 0x38);
-{ uint64_t x33 = ((uint64_t)x31 & 0xffffffffffffff);
-{ uint128_t x34 = (x32 + x18);
-{ uint64_t x35 = (uint64_t) (x34 >> 0x39);
-{ uint64_t x36 = ((uint64_t)x34 & 0x1ffffffffffffff);
-{ uint128_t x37 = (x35 + x17);
-{ uint128_t x38 = (x37 >> 0x38);
-{ uint64_t x39 = ((uint64_t)x37 & 0xffffffffffffff);
-{ uint128_t x40 = (x38 + x16);
-{ uint64_t x41 = (uint64_t) (x40 >> 0x39);
-{ uint64_t x42 = ((uint64_t)x40 & 0x1ffffffffffffff);
-{ uint128_t x43 = (x41 + x15);
-{ uint64_t x44 = (uint64_t) (x43 >> 0x38);
-{ uint64_t x45 = ((uint64_t)x43 & 0xffffffffffffff);
-{ uint128_t x46 = (x24 + ((uint128_t)0x3 * x44));
-{ uint64_t x47 = (uint64_t) (x46 >> 0x39);
-{ uint64_t x48 = ((uint64_t)x46 & 0x1ffffffffffffff);
-{ uint64_t x49 = (x47 + x27);
-{ uint64_t x50 = (x49 >> 0x38);
-{ uint64_t x51 = (x49 & 0xffffffffffffff);
-out[0] = x45;
-out[1] = x42;
-out[2] = x39;
-out[3] = x36;
-out[4] = x33;
-out[5] = x50 + x30;
-out[6] = x51;
-out[7] = x48;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void fesquare(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x15 = (((uint128_t)x2 * x13) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x13 * x2))))))));
+ { uint128_t x16 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + (((uint128_t)x6 * x10) + ((0x2 * ((uint128_t)x8 * x8)) + (((uint128_t)x10 * x6) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x3 * (0x2 * ((uint128_t)x13 * x13))));
+ { uint128_t x17 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x3 * (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))));
+ { uint128_t x18 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + (((uint128_t)x6 * x6) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x3 * ((0x2 * ((uint128_t)x12 * x13)) + (((uint128_t)x14 * x14) + (0x2 * ((uint128_t)x13 * x12))))));
+ { uint128_t x19 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x3 * (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10))))));
+ { uint128_t x20 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x3 * ((0x2 * ((uint128_t)x8 * x13)) + (((uint128_t)x10 * x14) + ((0x2 * ((uint128_t)x12 * x12)) + (((uint128_t)x14 * x10) + (0x2 * ((uint128_t)x13 * x8))))))));
+ { uint128_t x21 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x3 * (((uint128_t)x6 * x13) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + ((uint128_t)x13 * x6))))))));
+ { uint128_t x22 = (((uint128_t)x2 * x2) + (0x3 * ((0x2 * ((uint128_t)x4 * x13)) + (((uint128_t)x6 * x14) + ((0x2 * ((uint128_t)x8 * x12)) + (((uint128_t)x10 * x10) + ((0x2 * ((uint128_t)x12 * x8)) + (((uint128_t)x14 * x6) + (0x2 * ((uint128_t)x13 * x4))))))))));
+ { uint128_t x23 = (x22 >> 0x39);
+ { uint64_t x24 = ((uint64_t)x22 & 0x1ffffffffffffff);
+ { uint128_t x25 = (x23 + x21);
+ { uint128_t x26 = (x25 >> 0x38);
+ { uint64_t x27 = ((uint64_t)x25 & 0xffffffffffffff);
+ { uint128_t x28 = (x26 + x20);
+ { uint128_t x29 = (x28 >> 0x39);
+ { uint64_t x30 = ((uint64_t)x28 & 0x1ffffffffffffff);
+ { uint128_t x31 = (x29 + x19);
+ { uint128_t x32 = (x31 >> 0x38);
+ { uint64_t x33 = ((uint64_t)x31 & 0xffffffffffffff);
+ { uint128_t x34 = (x32 + x18);
+ { uint64_t x35 = (uint64_t) (x34 >> 0x39);
+ { uint64_t x36 = ((uint64_t)x34 & 0x1ffffffffffffff);
+ { uint128_t x37 = (x35 + x17);
+ { uint128_t x38 = (x37 >> 0x38);
+ { uint64_t x39 = ((uint64_t)x37 & 0xffffffffffffff);
+ { uint128_t x40 = (x38 + x16);
+ { uint64_t x41 = (uint64_t) (x40 >> 0x39);
+ { uint64_t x42 = ((uint64_t)x40 & 0x1ffffffffffffff);
+ { uint128_t x43 = (x41 + x15);
+ { uint64_t x44 = (uint64_t) (x43 >> 0x38);
+ { uint64_t x45 = ((uint64_t)x43 & 0xffffffffffffff);
+ { uint128_t x46 = (x24 + ((uint128_t)0x3 * x44));
+ { uint64_t x47 = (uint64_t) (x46 >> 0x39);
+ { uint64_t x48 = ((uint64_t)x46 & 0x1ffffffffffffff);
+ { uint64_t x49 = (x47 + x27);
+ { uint64_t x50 = (x49 >> 0x38);
+ { uint64_t x51 = (x49 & 0xffffffffffffff);
+ out[0] = x48;
+ out[1] = x51;
+ out[2] = (x50 + x30);
+ out[3] = x33;
+ out[4] = x36;
+ out[5] = x39;
+ out[6] = x42;
+ out[7] = x45;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e452m3/freeze.c b/src/Specific/solinas64_2e452m3/freeze.c
index 3901344f6..f54bffc77 100644
--- a/src/Specific/solinas64_2e452m3/freeze.c
+++ b/src/Specific/solinas64_2e452m3/freeze.c
@@ -1,25 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x16;
-out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 57 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x1fffffffffffffd;;
+static void freeze(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x1fffffffffffffd);
+ { uint64_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x17, Return x4, 0xffffffffffffff);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x20, Return x6, 0x1ffffffffffffff);
+ { uint64_t x25, uint8_t x26 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x8, 0xffffffffffffff);
+ { uint64_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x26, Return x10, 0x1ffffffffffffff);
+ { uint64_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x29, Return x12, 0xffffffffffffff);
+ { uint64_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x32, Return x14, 0x1ffffffffffffff);
+ { uint64_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x35, Return x13, 0xffffffffffffff);
+ { uint64_t x39 = (uint64_t)cmovznz(x38, 0x0, 0xffffffffffffffffL);
+ { uint64_t x40 = (x39 & 0x1fffffffffffffd);
+ { uint64_t x42, uint8_t x43 = Op (Syntax.AddWithGetCarry 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x16, Return x40);
+ { uint64_t x44 = (x39 & 0xffffffffffffff);
+ { uint64_t x46, uint8_t x47 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x43, Return x19, Return x44);
+ { uint64_t x48 = (x39 & 0x1ffffffffffffff);
+ { uint64_t x50, uint8_t x51 = Op (Syntax.AddWithGetCarry 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x47, Return x22, Return x48);
+ { uint64_t x52 = (x39 & 0xffffffffffffff);
+ { uint64_t x54, uint8_t x55 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x51, Return x25, Return x52);
+ { uint64_t x56 = (x39 & 0x1ffffffffffffff);
+ { uint64_t x58, uint8_t x59 = Op (Syntax.AddWithGetCarry 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x55, Return x28, Return x56);
+ { uint64_t x60 = (x39 & 0xffffffffffffff);
+ { uint64_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x59, Return x31, Return x60);
+ { uint64_t x64 = (x39 & 0x1ffffffffffffff);
+ { uint64_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 57 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x63, Return x34, Return x64);
+ { uint64_t x68 = (x39 & 0xffffffffffffff);
+ { uint64_t x70, uint8_t _ = Op (Syntax.AddWithGetCarry 56 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x67, Return x37, Return x68);
+ out[0] = x42;
+ out[1] = x46;
+ out[2] = x50;
+ out[3] = x54;
+ out[4] = x58;
+ out[5] = x62;
+ out[6] = x66;
+ out[7] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e468m17/femul.c b/src/Specific/solinas64_2e468m17/femul.c
index ed57de1b6..577cfa76f 100644
--- a/src/Specific/solinas64_2e468m17/femul.c
+++ b/src/Specific/solinas64_2e468m17/femul.c
@@ -1,66 +1,64 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint128_t x32 = (((uint128_t)x5 * x30) + (((uint128_t)x7 * x31) + (((uint128_t)x9 * x29) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + (((uint128_t)x17 * x21) + ((uint128_t)x16 * x19))))))));
-{ uint128_t x33 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + (((uint128_t)x9 * x27) + ((0x2 * ((uint128_t)x11 * x25)) + (((uint128_t)x13 * x23) + ((0x2 * ((uint128_t)x15 * x21)) + ((uint128_t)x17 * x19))))))) + (0x11 * (0x2 * ((uint128_t)x16 * x30))));
-{ uint128_t x34 = ((((uint128_t)x5 * x29) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x15 * x19)))))) + (0x11 * (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))));
-{ uint128_t x35 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + (((uint128_t)x9 * x23) + ((0x2 * ((uint128_t)x11 * x21)) + ((uint128_t)x13 * x19))))) + (0x11 * ((0x2 * ((uint128_t)x15 * x30)) + (((uint128_t)x17 * x31) + (0x2 * ((uint128_t)x16 * x29))))));
-{ uint128_t x36 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))) + (0x11 * (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27))))));
-{ uint128_t x37 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((uint128_t)x9 * x19))) + (0x11 * ((0x2 * ((uint128_t)x11 * x30)) + (((uint128_t)x13 * x31) + ((0x2 * ((uint128_t)x15 * x29)) + (((uint128_t)x17 * x27) + (0x2 * ((uint128_t)x16 * x25))))))));
-{ uint128_t x38 = ((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (0x11 * (((uint128_t)x9 * x30) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + ((uint128_t)x16 * x23))))))));
-{ uint128_t x39 = (((uint128_t)x5 * x19) + (0x11 * ((0x2 * ((uint128_t)x7 * x30)) + (((uint128_t)x9 * x31) + ((0x2 * ((uint128_t)x11 * x29)) + (((uint128_t)x13 * x27) + ((0x2 * ((uint128_t)x15 * x25)) + (((uint128_t)x17 * x23) + (0x2 * ((uint128_t)x16 * x21))))))))));
-{ uint128_t x40 = (x39 >> 0x3b);
-{ uint64_t x41 = ((uint64_t)x39 & 0x7ffffffffffffff);
-{ uint128_t x42 = (x40 + x38);
-{ uint128_t x43 = (x42 >> 0x3a);
-{ uint64_t x44 = ((uint64_t)x42 & 0x3ffffffffffffff);
-{ uint128_t x45 = (x43 + x37);
-{ uint128_t x46 = (x45 >> 0x3b);
-{ uint64_t x47 = ((uint64_t)x45 & 0x7ffffffffffffff);
-{ uint128_t x48 = (x46 + x36);
-{ uint128_t x49 = (x48 >> 0x3a);
-{ uint64_t x50 = ((uint64_t)x48 & 0x3ffffffffffffff);
-{ uint128_t x51 = (x49 + x35);
-{ uint128_t x52 = (x51 >> 0x3b);
-{ uint64_t x53 = ((uint64_t)x51 & 0x7ffffffffffffff);
-{ uint128_t x54 = (x52 + x34);
-{ uint128_t x55 = (x54 >> 0x3a);
-{ uint64_t x56 = ((uint64_t)x54 & 0x3ffffffffffffff);
-{ uint128_t x57 = (x55 + x33);
-{ uint128_t x58 = (x57 >> 0x3b);
-{ uint64_t x59 = ((uint64_t)x57 & 0x7ffffffffffffff);
-{ uint128_t x60 = (x58 + x32);
-{ uint128_t x61 = (x60 >> 0x3a);
-{ uint64_t x62 = ((uint64_t)x60 & 0x3ffffffffffffff);
-{ uint128_t x63 = (x41 + (0x11 * x61));
-{ uint64_t x64 = (uint64_t) (x63 >> 0x3b);
-{ uint64_t x65 = ((uint64_t)x63 & 0x7ffffffffffffff);
-{ uint64_t x66 = (x64 + x44);
-{ uint64_t x67 = (x66 >> 0x3a);
-{ uint64_t x68 = (x66 & 0x3ffffffffffffff);
-out[0] = x62;
-out[1] = x59;
-out[2] = x56;
-out[3] = x53;
-out[4] = x50;
-out[5] = x67 + x47;
-out[6] = x68;
-out[7] = x65;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void femul(uint64_t out[8], const uint64_t in1[8], const uint64_t in2[8]) {
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x30 = in2[7];
+ { const uint64_t x31 = in2[6];
+ { const uint64_t x29 = in2[5];
+ { const uint64_t x27 = in2[4];
+ { const uint64_t x25 = in2[3];
+ { const uint64_t x23 = in2[2];
+ { const uint64_t x21 = in2[1];
+ { const uint64_t x19 = in2[0];
+ { uint128_t x32 = (((uint128_t)x5 * x30) + (((uint128_t)x7 * x31) + (((uint128_t)x9 * x29) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + (((uint128_t)x17 * x21) + ((uint128_t)x16 * x19))))))));
+ { uint128_t x33 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + (((uint128_t)x9 * x27) + ((0x2 * ((uint128_t)x11 * x25)) + (((uint128_t)x13 * x23) + ((0x2 * ((uint128_t)x15 * x21)) + ((uint128_t)x17 * x19))))))) + (0x11 * (0x2 * ((uint128_t)x16 * x30))));
+ { uint128_t x34 = ((((uint128_t)x5 * x29) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x15 * x19)))))) + (0x11 * (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))));
+ { uint128_t x35 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + (((uint128_t)x9 * x23) + ((0x2 * ((uint128_t)x11 * x21)) + ((uint128_t)x13 * x19))))) + (0x11 * ((0x2 * ((uint128_t)x15 * x30)) + (((uint128_t)x17 * x31) + (0x2 * ((uint128_t)x16 * x29))))));
+ { uint128_t x36 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))) + (0x11 * (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27))))));
+ { uint128_t x37 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((uint128_t)x9 * x19))) + (0x11 * ((0x2 * ((uint128_t)x11 * x30)) + (((uint128_t)x13 * x31) + ((0x2 * ((uint128_t)x15 * x29)) + (((uint128_t)x17 * x27) + (0x2 * ((uint128_t)x16 * x25))))))));
+ { uint128_t x38 = ((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (0x11 * (((uint128_t)x9 * x30) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + ((uint128_t)x16 * x23))))))));
+ { uint128_t x39 = (((uint128_t)x5 * x19) + (0x11 * ((0x2 * ((uint128_t)x7 * x30)) + (((uint128_t)x9 * x31) + ((0x2 * ((uint128_t)x11 * x29)) + (((uint128_t)x13 * x27) + ((0x2 * ((uint128_t)x15 * x25)) + (((uint128_t)x17 * x23) + (0x2 * ((uint128_t)x16 * x21))))))))));
+ { uint128_t x40 = (x39 >> 0x3b);
+ { uint64_t x41 = ((uint64_t)x39 & 0x7ffffffffffffff);
+ { uint128_t x42 = (x40 + x38);
+ { uint128_t x43 = (x42 >> 0x3a);
+ { uint64_t x44 = ((uint64_t)x42 & 0x3ffffffffffffff);
+ { uint128_t x45 = (x43 + x37);
+ { uint128_t x46 = (x45 >> 0x3b);
+ { uint64_t x47 = ((uint64_t)x45 & 0x7ffffffffffffff);
+ { uint128_t x48 = (x46 + x36);
+ { uint128_t x49 = (x48 >> 0x3a);
+ { uint64_t x50 = ((uint64_t)x48 & 0x3ffffffffffffff);
+ { uint128_t x51 = (x49 + x35);
+ { uint128_t x52 = (x51 >> 0x3b);
+ { uint64_t x53 = ((uint64_t)x51 & 0x7ffffffffffffff);
+ { uint128_t x54 = (x52 + x34);
+ { uint128_t x55 = (x54 >> 0x3a);
+ { uint64_t x56 = ((uint64_t)x54 & 0x3ffffffffffffff);
+ { uint128_t x57 = (x55 + x33);
+ { uint128_t x58 = (x57 >> 0x3b);
+ { uint64_t x59 = ((uint64_t)x57 & 0x7ffffffffffffff);
+ { uint128_t x60 = (x58 + x32);
+ { uint128_t x61 = (x60 >> 0x3a);
+ { uint64_t x62 = ((uint64_t)x60 & 0x3ffffffffffffff);
+ { uint128_t x63 = (x41 + (0x11 * x61));
+ { uint64_t x64 = (uint64_t) (x63 >> 0x3b);
+ { uint64_t x65 = ((uint64_t)x63 & 0x7ffffffffffffff);
+ { uint64_t x66 = (x64 + x44);
+ { uint64_t x67 = (x66 >> 0x3a);
+ { uint64_t x68 = (x66 & 0x3ffffffffffffff);
+ out[0] = x65;
+ out[1] = x68;
+ out[2] = (x67 + x47);
+ out[3] = x50;
+ out[4] = x53;
+ out[5] = x56;
+ out[6] = x59;
+ out[7] = x62;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e468m17/fesquare.c b/src/Specific/solinas64_2e468m17/fesquare.c
index 8394de5b0..edce105d8 100644
--- a/src/Specific/solinas64_2e468m17/fesquare.c
+++ b/src/Specific/solinas64_2e468m17/fesquare.c
@@ -1,66 +1,56 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x15 = (((uint128_t)x2 * x13) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x13 * x2))))))));
-{ uint128_t x16 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + (((uint128_t)x6 * x10) + ((0x2 * ((uint128_t)x8 * x8)) + (((uint128_t)x10 * x6) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x11 * (0x2 * ((uint128_t)x13 * x13))));
-{ uint128_t x17 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x11 * (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))));
-{ uint128_t x18 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + (((uint128_t)x6 * x6) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x11 * ((0x2 * ((uint128_t)x12 * x13)) + (((uint128_t)x14 * x14) + (0x2 * ((uint128_t)x13 * x12))))));
-{ uint128_t x19 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x11 * (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10))))));
-{ uint128_t x20 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x11 * ((0x2 * ((uint128_t)x8 * x13)) + (((uint128_t)x10 * x14) + ((0x2 * ((uint128_t)x12 * x12)) + (((uint128_t)x14 * x10) + (0x2 * ((uint128_t)x13 * x8))))))));
-{ uint128_t x21 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * (((uint128_t)x6 * x13) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + ((uint128_t)x13 * x6))))))));
-{ uint128_t x22 = (((uint128_t)x2 * x2) + (0x11 * ((0x2 * ((uint128_t)x4 * x13)) + (((uint128_t)x6 * x14) + ((0x2 * ((uint128_t)x8 * x12)) + (((uint128_t)x10 * x10) + ((0x2 * ((uint128_t)x12 * x8)) + (((uint128_t)x14 * x6) + (0x2 * ((uint128_t)x13 * x4))))))))));
-{ uint128_t x23 = (x22 >> 0x3b);
-{ uint64_t x24 = ((uint64_t)x22 & 0x7ffffffffffffff);
-{ uint128_t x25 = (x23 + x21);
-{ uint128_t x26 = (x25 >> 0x3a);
-{ uint64_t x27 = ((uint64_t)x25 & 0x3ffffffffffffff);
-{ uint128_t x28 = (x26 + x20);
-{ uint128_t x29 = (x28 >> 0x3b);
-{ uint64_t x30 = ((uint64_t)x28 & 0x7ffffffffffffff);
-{ uint128_t x31 = (x29 + x19);
-{ uint128_t x32 = (x31 >> 0x3a);
-{ uint64_t x33 = ((uint64_t)x31 & 0x3ffffffffffffff);
-{ uint128_t x34 = (x32 + x18);
-{ uint128_t x35 = (x34 >> 0x3b);
-{ uint64_t x36 = ((uint64_t)x34 & 0x7ffffffffffffff);
-{ uint128_t x37 = (x35 + x17);
-{ uint128_t x38 = (x37 >> 0x3a);
-{ uint64_t x39 = ((uint64_t)x37 & 0x3ffffffffffffff);
-{ uint128_t x40 = (x38 + x16);
-{ uint128_t x41 = (x40 >> 0x3b);
-{ uint64_t x42 = ((uint64_t)x40 & 0x7ffffffffffffff);
-{ uint128_t x43 = (x41 + x15);
-{ uint128_t x44 = (x43 >> 0x3a);
-{ uint64_t x45 = ((uint64_t)x43 & 0x3ffffffffffffff);
-{ uint128_t x46 = (x24 + (0x11 * x44));
-{ uint64_t x47 = (uint64_t) (x46 >> 0x3b);
-{ uint64_t x48 = ((uint64_t)x46 & 0x7ffffffffffffff);
-{ uint64_t x49 = (x47 + x27);
-{ uint64_t x50 = (x49 >> 0x3a);
-{ uint64_t x51 = (x49 & 0x3ffffffffffffff);
-out[0] = x45;
-out[1] = x42;
-out[2] = x39;
-out[3] = x36;
-out[4] = x33;
-out[5] = x50 + x30;
-out[6] = x51;
-out[7] = x48;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void fesquare(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x15 = (((uint128_t)x2 * x13) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x13 * x2))))))));
+ { uint128_t x16 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + (((uint128_t)x6 * x10) + ((0x2 * ((uint128_t)x8 * x8)) + (((uint128_t)x10 * x6) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x11 * (0x2 * ((uint128_t)x13 * x13))));
+ { uint128_t x17 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x11 * (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))));
+ { uint128_t x18 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + (((uint128_t)x6 * x6) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x11 * ((0x2 * ((uint128_t)x12 * x13)) + (((uint128_t)x14 * x14) + (0x2 * ((uint128_t)x13 * x12))))));
+ { uint128_t x19 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x11 * (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10))))));
+ { uint128_t x20 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x11 * ((0x2 * ((uint128_t)x8 * x13)) + (((uint128_t)x10 * x14) + ((0x2 * ((uint128_t)x12 * x12)) + (((uint128_t)x14 * x10) + (0x2 * ((uint128_t)x13 * x8))))))));
+ { uint128_t x21 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * (((uint128_t)x6 * x13) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + ((uint128_t)x13 * x6))))))));
+ { uint128_t x22 = (((uint128_t)x2 * x2) + (0x11 * ((0x2 * ((uint128_t)x4 * x13)) + (((uint128_t)x6 * x14) + ((0x2 * ((uint128_t)x8 * x12)) + (((uint128_t)x10 * x10) + ((0x2 * ((uint128_t)x12 * x8)) + (((uint128_t)x14 * x6) + (0x2 * ((uint128_t)x13 * x4))))))))));
+ { uint128_t x23 = (x22 >> 0x3b);
+ { uint64_t x24 = ((uint64_t)x22 & 0x7ffffffffffffff);
+ { uint128_t x25 = (x23 + x21);
+ { uint128_t x26 = (x25 >> 0x3a);
+ { uint64_t x27 = ((uint64_t)x25 & 0x3ffffffffffffff);
+ { uint128_t x28 = (x26 + x20);
+ { uint128_t x29 = (x28 >> 0x3b);
+ { uint64_t x30 = ((uint64_t)x28 & 0x7ffffffffffffff);
+ { uint128_t x31 = (x29 + x19);
+ { uint128_t x32 = (x31 >> 0x3a);
+ { uint64_t x33 = ((uint64_t)x31 & 0x3ffffffffffffff);
+ { uint128_t x34 = (x32 + x18);
+ { uint128_t x35 = (x34 >> 0x3b);
+ { uint64_t x36 = ((uint64_t)x34 & 0x7ffffffffffffff);
+ { uint128_t x37 = (x35 + x17);
+ { uint128_t x38 = (x37 >> 0x3a);
+ { uint64_t x39 = ((uint64_t)x37 & 0x3ffffffffffffff);
+ { uint128_t x40 = (x38 + x16);
+ { uint128_t x41 = (x40 >> 0x3b);
+ { uint64_t x42 = ((uint64_t)x40 & 0x7ffffffffffffff);
+ { uint128_t x43 = (x41 + x15);
+ { uint128_t x44 = (x43 >> 0x3a);
+ { uint64_t x45 = ((uint64_t)x43 & 0x3ffffffffffffff);
+ { uint128_t x46 = (x24 + (0x11 * x44));
+ { uint64_t x47 = (uint64_t) (x46 >> 0x3b);
+ { uint64_t x48 = ((uint64_t)x46 & 0x7ffffffffffffff);
+ { uint64_t x49 = (x47 + x27);
+ { uint64_t x50 = (x49 >> 0x3a);
+ { uint64_t x51 = (x49 & 0x3ffffffffffffff);
+ out[0] = x48;
+ out[1] = x51;
+ out[2] = (x50 + x30);
+ out[3] = x33;
+ out[4] = x36;
+ out[5] = x39;
+ out[6] = x42;
+ out[7] = x45;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e468m17/freeze.c b/src/Specific/solinas64_2e468m17/freeze.c
index 8310bd145..842d0a1cf 100644
--- a/src/Specific/solinas64_2e468m17/freeze.c
+++ b/src/Specific/solinas64_2e468m17/freeze.c
@@ -1,25 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x16;
-out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 59 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7ffffffffffffef;;
+static void freeze(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x7ffffffffffffef);
+ { uint64_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x17, Return x4, 0x3ffffffffffffff);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x20, Return x6, 0x7ffffffffffffff);
+ { uint64_t x25, uint8_t x26 = Op (Syntax.SubWithGetBorrow 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x8, 0x3ffffffffffffff);
+ { uint64_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x26, Return x10, 0x7ffffffffffffff);
+ { uint64_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x29, Return x12, 0x3ffffffffffffff);
+ { uint64_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x32, Return x14, 0x7ffffffffffffff);
+ { uint64_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x35, Return x13, 0x3ffffffffffffff);
+ { uint64_t x39 = (uint64_t)cmovznz(x38, 0x0, 0xffffffffffffffffL);
+ { uint64_t x40 = (x39 & 0x7ffffffffffffef);
+ { uint64_t x42, uint8_t x43 = Op (Syntax.AddWithGetCarry 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x16, Return x40);
+ { uint64_t x44 = (x39 & 0x3ffffffffffffff);
+ { uint64_t x46, uint8_t x47 = Op (Syntax.AddWithGetCarry 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x43, Return x19, Return x44);
+ { uint64_t x48 = (x39 & 0x7ffffffffffffff);
+ { uint64_t x50, uint8_t x51 = Op (Syntax.AddWithGetCarry 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x47, Return x22, Return x48);
+ { uint64_t x52 = (x39 & 0x3ffffffffffffff);
+ { uint64_t x54, uint8_t x55 = Op (Syntax.AddWithGetCarry 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x51, Return x25, Return x52);
+ { uint64_t x56 = (x39 & 0x7ffffffffffffff);
+ { uint64_t x58, uint8_t x59 = Op (Syntax.AddWithGetCarry 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x55, Return x28, Return x56);
+ { uint64_t x60 = (x39 & 0x3ffffffffffffff);
+ { uint64_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x59, Return x31, Return x60);
+ { uint64_t x64 = (x39 & 0x7ffffffffffffff);
+ { uint64_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 59 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x63, Return x34, Return x64);
+ { uint64_t x68 = (x39 & 0x3ffffffffffffff);
+ { uint64_t x70, uint8_t _ = Op (Syntax.AddWithGetCarry 58 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x67, Return x37, Return x68);
+ out[0] = x42;
+ out[1] = x46;
+ out[2] = x50;
+ out[3] = x54;
+ out[4] = x58;
+ out[5] = x62;
+ out[6] = x66;
+ out[7] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e480m2e240m1/femul.c b/src/Specific/solinas64_2e480m2e240m1/femul.c
index af01cc5bd..f76985cfd 100644
--- a/src/Specific/solinas64_2e480m2e240m1/femul.c
+++ b/src/Specific/solinas64_2e480m2e240m1/femul.c
@@ -1,83 +1,81 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
-{ uint128_t x32 = (((uint128_t)(x11 + x16) * (x25 + x30)) - ((uint128_t)x11 * x25));
-{ uint128_t x33 = ((((uint128_t)(x9 + x17) * (x25 + x30)) + ((uint128_t)(x11 + x16) * (x23 + x31))) - (((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)));
-{ uint128_t x34 = ((((uint128_t)(x7 + x15) * (x25 + x30)) + (((uint128_t)(x9 + x17) * (x23 + x31)) + ((uint128_t)(x11 + x16) * (x21 + x29)))) - (((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21))));
-{ uint128_t x35 = ((((uint128_t)(x5 + x13) * (x25 + x30)) + (((uint128_t)(x7 + x15) * (x23 + x31)) + (((uint128_t)(x9 + x17) * (x21 + x29)) + ((uint128_t)(x11 + x16) * (x19 + x27))))) - (((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))));
-{ uint128_t x36 = ((((uint128_t)(x5 + x13) * (x23 + x31)) + (((uint128_t)(x7 + x15) * (x21 + x29)) + ((uint128_t)(x9 + x17) * (x19 + x27)))) - (((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + ((uint128_t)x9 * x19))));
-{ uint128_t x37 = ((((uint128_t)(x5 + x13) * (x21 + x29)) + ((uint128_t)(x7 + x15) * (x19 + x27))) - (((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)));
-{ uint128_t x38 = (((uint128_t)(x5 + x13) * (x19 + x27)) - ((uint128_t)x5 * x19));
-{ uint128_t x39 = (((((uint128_t)x11 * x25) + ((uint128_t)x16 * x30)) + x36) + x32);
-{ uint128_t x40 = ((((((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)) + (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))) + x37) + x33);
-{ uint128_t x41 = ((((((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21))) + (((uint128_t)x15 * x30) + (((uint128_t)x17 * x31) + ((uint128_t)x16 * x29)))) + x38) + x34);
-{ uint128_t x42 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))) + (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27)))));
-{ uint128_t x43 = (((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + ((uint128_t)x9 * x19))) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + ((uint128_t)x17 * x27)))) + x32);
-{ uint128_t x44 = (((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (((uint128_t)x13 * x29) + ((uint128_t)x15 * x27))) + x33);
-{ uint128_t x45 = ((((uint128_t)x5 * x19) + ((uint128_t)x13 * x27)) + x34);
-{ uint128_t x46 = (x42 >> 0x3c);
-{ uint64_t x47 = ((uint64_t)x42 & 0xfffffffffffffff);
-{ uint128_t x48 = (x35 >> 0x3c);
-{ uint64_t x49 = ((uint64_t)x35 & 0xfffffffffffffff);
-{ uint128_t x50 = ((0x1000000000000000 * x48) + x49);
-{ uint128_t x51 = (x50 >> 0x3c);
-{ uint64_t x52 = ((uint64_t)x50 & 0xfffffffffffffff);
-{ uint128_t x53 = ((x46 + x41) + x51);
-{ uint128_t x54 = (x53 >> 0x3c);
-{ uint64_t x55 = ((uint64_t)x53 & 0xfffffffffffffff);
-{ uint128_t x56 = (x45 + x51);
-{ uint128_t x57 = (x56 >> 0x3c);
-{ uint64_t x58 = ((uint64_t)x56 & 0xfffffffffffffff);
-{ uint128_t x59 = (x54 + x40);
-{ uint128_t x60 = (x59 >> 0x3c);
-{ uint64_t x61 = ((uint64_t)x59 & 0xfffffffffffffff);
-{ uint128_t x62 = (x57 + x44);
-{ uint128_t x63 = (x62 >> 0x3c);
-{ uint64_t x64 = ((uint64_t)x62 & 0xfffffffffffffff);
-{ uint128_t x65 = (x60 + x39);
-{ uint128_t x66 = (x65 >> 0x3c);
-{ uint64_t x67 = ((uint64_t)x65 & 0xfffffffffffffff);
-{ uint128_t x68 = (x63 + x43);
-{ uint128_t x69 = (x68 >> 0x3c);
-{ uint64_t x70 = ((uint64_t)x68 & 0xfffffffffffffff);
-{ uint128_t x71 = (x66 + x52);
-{ uint64_t x72 = (uint64_t) (x71 >> 0x3c);
-{ uint64_t x73 = ((uint64_t)x71 & 0xfffffffffffffff);
-{ uint128_t x74 = (x69 + x47);
-{ uint64_t x75 = (uint64_t) (x74 >> 0x3c);
-{ uint64_t x76 = ((uint64_t)x74 & 0xfffffffffffffff);
-{ uint128_t x77 = (((uint128_t)0x1000000000000000 * x72) + x73);
-{ uint64_t x78 = (uint64_t) (x77 >> 0x3c);
-{ uint64_t x79 = ((uint64_t)x77 & 0xfffffffffffffff);
-{ uint64_t x80 = ((x75 + x55) + x78);
-{ uint64_t x81 = (x80 >> 0x3c);
-{ uint64_t x82 = (x80 & 0xfffffffffffffff);
-{ uint64_t x83 = (x58 + x78);
-{ uint64_t x84 = (x83 >> 0x3c);
-{ uint64_t x85 = (x83 & 0xfffffffffffffff);
-out[0] = x79;
-out[1] = x67;
-out[2] = x81 + x61;
-out[3] = x82;
-out[4] = x76;
-out[5] = x70;
-out[6] = x84 + x64;
-out[7] = x85;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void femul(uint64_t out[8], const uint64_t in1[8], const uint64_t in2[8]) {
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x30 = in2[7];
+ { const uint64_t x31 = in2[6];
+ { const uint64_t x29 = in2[5];
+ { const uint64_t x27 = in2[4];
+ { const uint64_t x25 = in2[3];
+ { const uint64_t x23 = in2[2];
+ { const uint64_t x21 = in2[1];
+ { const uint64_t x19 = in2[0];
+ { uint128_t x32 = (((uint128_t)(x11 + x16) * (x25 + x30)) - ((uint128_t)x11 * x25));
+ { uint128_t x33 = ((((uint128_t)(x9 + x17) * (x25 + x30)) + ((uint128_t)(x11 + x16) * (x23 + x31))) - (((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)));
+ { uint128_t x34 = ((((uint128_t)(x7 + x15) * (x25 + x30)) + (((uint128_t)(x9 + x17) * (x23 + x31)) + ((uint128_t)(x11 + x16) * (x21 + x29)))) - (((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21))));
+ { uint128_t x35 = ((((uint128_t)(x5 + x13) * (x25 + x30)) + (((uint128_t)(x7 + x15) * (x23 + x31)) + (((uint128_t)(x9 + x17) * (x21 + x29)) + ((uint128_t)(x11 + x16) * (x19 + x27))))) - (((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))));
+ { uint128_t x36 = ((((uint128_t)(x5 + x13) * (x23 + x31)) + (((uint128_t)(x7 + x15) * (x21 + x29)) + ((uint128_t)(x9 + x17) * (x19 + x27)))) - (((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + ((uint128_t)x9 * x19))));
+ { uint128_t x37 = ((((uint128_t)(x5 + x13) * (x21 + x29)) + ((uint128_t)(x7 + x15) * (x19 + x27))) - (((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)));
+ { uint128_t x38 = (((uint128_t)(x5 + x13) * (x19 + x27)) - ((uint128_t)x5 * x19));
+ { uint128_t x39 = (((((uint128_t)x11 * x25) + ((uint128_t)x16 * x30)) + x36) + x32);
+ { uint128_t x40 = ((((((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)) + (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))) + x37) + x33);
+ { uint128_t x41 = ((((((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21))) + (((uint128_t)x15 * x30) + (((uint128_t)x17 * x31) + ((uint128_t)x16 * x29)))) + x38) + x34);
+ { uint128_t x42 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))) + (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27)))));
+ { uint128_t x43 = (((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + ((uint128_t)x9 * x19))) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + ((uint128_t)x17 * x27)))) + x32);
+ { uint128_t x44 = (((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (((uint128_t)x13 * x29) + ((uint128_t)x15 * x27))) + x33);
+ { uint128_t x45 = ((((uint128_t)x5 * x19) + ((uint128_t)x13 * x27)) + x34);
+ { uint128_t x46 = (x42 >> 0x3c);
+ { uint64_t x47 = ((uint64_t)x42 & 0xfffffffffffffff);
+ { uint128_t x48 = (x35 >> 0x3c);
+ { uint64_t x49 = ((uint64_t)x35 & 0xfffffffffffffff);
+ { uint128_t x50 = ((0x1000000000000000 * x48) + x49);
+ { uint128_t x51 = (x50 >> 0x3c);
+ { uint64_t x52 = ((uint64_t)x50 & 0xfffffffffffffff);
+ { uint128_t x53 = ((x46 + x41) + x51);
+ { uint128_t x54 = (x53 >> 0x3c);
+ { uint64_t x55 = ((uint64_t)x53 & 0xfffffffffffffff);
+ { uint128_t x56 = (x45 + x51);
+ { uint128_t x57 = (x56 >> 0x3c);
+ { uint64_t x58 = ((uint64_t)x56 & 0xfffffffffffffff);
+ { uint128_t x59 = (x54 + x40);
+ { uint128_t x60 = (x59 >> 0x3c);
+ { uint64_t x61 = ((uint64_t)x59 & 0xfffffffffffffff);
+ { uint128_t x62 = (x57 + x44);
+ { uint128_t x63 = (x62 >> 0x3c);
+ { uint64_t x64 = ((uint64_t)x62 & 0xfffffffffffffff);
+ { uint128_t x65 = (x60 + x39);
+ { uint128_t x66 = (x65 >> 0x3c);
+ { uint64_t x67 = ((uint64_t)x65 & 0xfffffffffffffff);
+ { uint128_t x68 = (x63 + x43);
+ { uint128_t x69 = (x68 >> 0x3c);
+ { uint64_t x70 = ((uint64_t)x68 & 0xfffffffffffffff);
+ { uint128_t x71 = (x66 + x52);
+ { uint64_t x72 = (uint64_t) (x71 >> 0x3c);
+ { uint64_t x73 = ((uint64_t)x71 & 0xfffffffffffffff);
+ { uint128_t x74 = (x69 + x47);
+ { uint64_t x75 = (uint64_t) (x74 >> 0x3c);
+ { uint64_t x76 = ((uint64_t)x74 & 0xfffffffffffffff);
+ { uint128_t x77 = (((uint128_t)0x1000000000000000 * x72) + x73);
+ { uint64_t x78 = (uint64_t) (x77 >> 0x3c);
+ { uint64_t x79 = ((uint64_t)x77 & 0xfffffffffffffff);
+ { uint64_t x80 = ((x75 + x55) + x78);
+ { uint64_t x81 = (x80 >> 0x3c);
+ { uint64_t x82 = (x80 & 0xfffffffffffffff);
+ { uint64_t x83 = (x58 + x78);
+ { uint64_t x84 = (x83 >> 0x3c);
+ { uint64_t x85 = (x83 & 0xfffffffffffffff);
+ out[0] = x85;
+ out[1] = (x84 + x64);
+ out[2] = x70;
+ out[3] = x76;
+ out[4] = x82;
+ out[5] = (x81 + x61);
+ out[6] = x67;
+ out[7] = x79;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e480m2e240m1/fesquare.c b/src/Specific/solinas64_2e480m2e240m1/fesquare.c
index feaea291a..83913c4a7 100644
--- a/src/Specific/solinas64_2e480m2e240m1/fesquare.c
+++ b/src/Specific/solinas64_2e480m2e240m1/fesquare.c
@@ -1,83 +1,73 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x15 = (((uint128_t)(x8 + x13) * (x8 + x13)) - ((uint128_t)x8 * x8));
-{ uint128_t x16 = ((((uint128_t)(x6 + x14) * (x8 + x13)) + ((uint128_t)(x8 + x13) * (x6 + x14))) - (((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)));
-{ uint128_t x17 = ((((uint128_t)(x4 + x12) * (x8 + x13)) + (((uint128_t)(x6 + x14) * (x6 + x14)) + ((uint128_t)(x8 + x13) * (x4 + x12)))) - (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + ((uint128_t)x8 * x4))));
-{ uint128_t x18 = ((((uint128_t)(x2 + x10) * (x8 + x13)) + (((uint128_t)(x4 + x12) * (x6 + x14)) + (((uint128_t)(x6 + x14) * (x4 + x12)) + ((uint128_t)(x8 + x13) * (x2 + x10))))) - (((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))));
-{ uint128_t x19 = ((((uint128_t)(x2 + x10) * (x6 + x14)) + (((uint128_t)(x4 + x12) * (x4 + x12)) + ((uint128_t)(x6 + x14) * (x2 + x10)))) - (((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))));
-{ uint128_t x20 = ((((uint128_t)(x2 + x10) * (x4 + x12)) + ((uint128_t)(x4 + x12) * (x2 + x10))) - (((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)));
-{ uint128_t x21 = (((uint128_t)(x2 + x10) * (x2 + x10)) - ((uint128_t)x2 * x2));
-{ uint128_t x22 = (((((uint128_t)x8 * x8) + ((uint128_t)x13 * x13)) + x19) + x15);
-{ uint128_t x23 = ((((((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)) + (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))) + x20) + x16);
-{ uint128_t x24 = ((((((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + ((uint128_t)x8 * x4))) + (((uint128_t)x12 * x13) + (((uint128_t)x14 * x14) + ((uint128_t)x13 * x12)))) + x21) + x17);
-{ uint128_t x25 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10)))));
-{ uint128_t x26 = (((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + ((uint128_t)x14 * x10)))) + x15);
-{ uint128_t x27 = (((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (((uint128_t)x10 * x12) + ((uint128_t)x12 * x10))) + x16);
-{ uint128_t x28 = ((((uint128_t)x2 * x2) + ((uint128_t)x10 * x10)) + x17);
-{ uint128_t x29 = (x25 >> 0x3c);
-{ uint64_t x30 = ((uint64_t)x25 & 0xfffffffffffffff);
-{ uint128_t x31 = (x18 >> 0x3c);
-{ uint64_t x32 = ((uint64_t)x18 & 0xfffffffffffffff);
-{ uint128_t x33 = ((0x1000000000000000 * x31) + x32);
-{ uint128_t x34 = (x33 >> 0x3c);
-{ uint64_t x35 = ((uint64_t)x33 & 0xfffffffffffffff);
-{ uint128_t x36 = ((x29 + x24) + x34);
-{ uint128_t x37 = (x36 >> 0x3c);
-{ uint64_t x38 = ((uint64_t)x36 & 0xfffffffffffffff);
-{ uint128_t x39 = (x28 + x34);
-{ uint128_t x40 = (x39 >> 0x3c);
-{ uint64_t x41 = ((uint64_t)x39 & 0xfffffffffffffff);
-{ uint128_t x42 = (x37 + x23);
-{ uint128_t x43 = (x42 >> 0x3c);
-{ uint64_t x44 = ((uint64_t)x42 & 0xfffffffffffffff);
-{ uint128_t x45 = (x40 + x27);
-{ uint128_t x46 = (x45 >> 0x3c);
-{ uint64_t x47 = ((uint64_t)x45 & 0xfffffffffffffff);
-{ uint128_t x48 = (x43 + x22);
-{ uint128_t x49 = (x48 >> 0x3c);
-{ uint64_t x50 = ((uint64_t)x48 & 0xfffffffffffffff);
-{ uint128_t x51 = (x46 + x26);
-{ uint128_t x52 = (x51 >> 0x3c);
-{ uint64_t x53 = ((uint64_t)x51 & 0xfffffffffffffff);
-{ uint128_t x54 = (x49 + x35);
-{ uint64_t x55 = (uint64_t) (x54 >> 0x3c);
-{ uint64_t x56 = ((uint64_t)x54 & 0xfffffffffffffff);
-{ uint128_t x57 = (x52 + x30);
-{ uint64_t x58 = (uint64_t) (x57 >> 0x3c);
-{ uint64_t x59 = ((uint64_t)x57 & 0xfffffffffffffff);
-{ uint128_t x60 = (((uint128_t)0x1000000000000000 * x55) + x56);
-{ uint64_t x61 = (uint64_t) (x60 >> 0x3c);
-{ uint64_t x62 = ((uint64_t)x60 & 0xfffffffffffffff);
-{ uint64_t x63 = ((x58 + x38) + x61);
-{ uint64_t x64 = (x63 >> 0x3c);
-{ uint64_t x65 = (x63 & 0xfffffffffffffff);
-{ uint64_t x66 = (x41 + x61);
-{ uint64_t x67 = (x66 >> 0x3c);
-{ uint64_t x68 = (x66 & 0xfffffffffffffff);
-out[0] = x62;
-out[1] = x50;
-out[2] = x64 + x44;
-out[3] = x65;
-out[4] = x59;
-out[5] = x53;
-out[6] = x67 + x47;
-out[7] = x68;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[8];
+static void fesquare(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x15 = (((uint128_t)(x8 + x13) * (x8 + x13)) - ((uint128_t)x8 * x8));
+ { uint128_t x16 = ((((uint128_t)(x6 + x14) * (x8 + x13)) + ((uint128_t)(x8 + x13) * (x6 + x14))) - (((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)));
+ { uint128_t x17 = ((((uint128_t)(x4 + x12) * (x8 + x13)) + (((uint128_t)(x6 + x14) * (x6 + x14)) + ((uint128_t)(x8 + x13) * (x4 + x12)))) - (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + ((uint128_t)x8 * x4))));
+ { uint128_t x18 = ((((uint128_t)(x2 + x10) * (x8 + x13)) + (((uint128_t)(x4 + x12) * (x6 + x14)) + (((uint128_t)(x6 + x14) * (x4 + x12)) + ((uint128_t)(x8 + x13) * (x2 + x10))))) - (((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))));
+ { uint128_t x19 = ((((uint128_t)(x2 + x10) * (x6 + x14)) + (((uint128_t)(x4 + x12) * (x4 + x12)) + ((uint128_t)(x6 + x14) * (x2 + x10)))) - (((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))));
+ { uint128_t x20 = ((((uint128_t)(x2 + x10) * (x4 + x12)) + ((uint128_t)(x4 + x12) * (x2 + x10))) - (((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)));
+ { uint128_t x21 = (((uint128_t)(x2 + x10) * (x2 + x10)) - ((uint128_t)x2 * x2));
+ { uint128_t x22 = (((((uint128_t)x8 * x8) + ((uint128_t)x13 * x13)) + x19) + x15);
+ { uint128_t x23 = ((((((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)) + (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))) + x20) + x16);
+ { uint128_t x24 = ((((((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + ((uint128_t)x8 * x4))) + (((uint128_t)x12 * x13) + (((uint128_t)x14 * x14) + ((uint128_t)x13 * x12)))) + x21) + x17);
+ { uint128_t x25 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10)))));
+ { uint128_t x26 = (((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + ((uint128_t)x14 * x10)))) + x15);
+ { uint128_t x27 = (((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (((uint128_t)x10 * x12) + ((uint128_t)x12 * x10))) + x16);
+ { uint128_t x28 = ((((uint128_t)x2 * x2) + ((uint128_t)x10 * x10)) + x17);
+ { uint128_t x29 = (x25 >> 0x3c);
+ { uint64_t x30 = ((uint64_t)x25 & 0xfffffffffffffff);
+ { uint128_t x31 = (x18 >> 0x3c);
+ { uint64_t x32 = ((uint64_t)x18 & 0xfffffffffffffff);
+ { uint128_t x33 = ((0x1000000000000000 * x31) + x32);
+ { uint128_t x34 = (x33 >> 0x3c);
+ { uint64_t x35 = ((uint64_t)x33 & 0xfffffffffffffff);
+ { uint128_t x36 = ((x29 + x24) + x34);
+ { uint128_t x37 = (x36 >> 0x3c);
+ { uint64_t x38 = ((uint64_t)x36 & 0xfffffffffffffff);
+ { uint128_t x39 = (x28 + x34);
+ { uint128_t x40 = (x39 >> 0x3c);
+ { uint64_t x41 = ((uint64_t)x39 & 0xfffffffffffffff);
+ { uint128_t x42 = (x37 + x23);
+ { uint128_t x43 = (x42 >> 0x3c);
+ { uint64_t x44 = ((uint64_t)x42 & 0xfffffffffffffff);
+ { uint128_t x45 = (x40 + x27);
+ { uint128_t x46 = (x45 >> 0x3c);
+ { uint64_t x47 = ((uint64_t)x45 & 0xfffffffffffffff);
+ { uint128_t x48 = (x43 + x22);
+ { uint128_t x49 = (x48 >> 0x3c);
+ { uint64_t x50 = ((uint64_t)x48 & 0xfffffffffffffff);
+ { uint128_t x51 = (x46 + x26);
+ { uint128_t x52 = (x51 >> 0x3c);
+ { uint64_t x53 = ((uint64_t)x51 & 0xfffffffffffffff);
+ { uint128_t x54 = (x49 + x35);
+ { uint64_t x55 = (uint64_t) (x54 >> 0x3c);
+ { uint64_t x56 = ((uint64_t)x54 & 0xfffffffffffffff);
+ { uint128_t x57 = (x52 + x30);
+ { uint64_t x58 = (uint64_t) (x57 >> 0x3c);
+ { uint64_t x59 = ((uint64_t)x57 & 0xfffffffffffffff);
+ { uint128_t x60 = (((uint128_t)0x1000000000000000 * x55) + x56);
+ { uint64_t x61 = (uint64_t) (x60 >> 0x3c);
+ { uint64_t x62 = ((uint64_t)x60 & 0xfffffffffffffff);
+ { uint64_t x63 = ((x58 + x38) + x61);
+ { uint64_t x64 = (x63 >> 0x3c);
+ { uint64_t x65 = (x63 & 0xfffffffffffffff);
+ { uint64_t x66 = (x41 + x61);
+ { uint64_t x67 = (x66 >> 0x3c);
+ { uint64_t x68 = (x66 & 0xfffffffffffffff);
+ out[0] = x68;
+ out[1] = (x67 + x47);
+ out[2] = x53;
+ out[3] = x59;
+ out[4] = x65;
+ out[5] = (x64 + x44);
+ out[6] = x50;
+ out[7] = x62;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e480m2e240m1/freeze.c b/src/Specific/solinas64_2e480m2e240m1/freeze.c
index 9d5366fac..adedd4cd2 100644
--- a/src/Specific/solinas64_2e480m2e240m1/freeze.c
+++ b/src/Specific/solinas64_2e480m2e240m1/freeze.c
@@ -1,25 +1,44 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x16;
-out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 60 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xfffffffffffffff;;
+static void freeze(uint64_t out[8], const uint64_t in1[8]) {
+ { const uint64_t x13 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x16, uint8_t x17 = Op (Syntax.SubWithGetBorrow 60 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xfffffffffffffff);
+ { uint64_t x19, uint8_t x20 = Op (Syntax.SubWithGetBorrow 60 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x17, Return x4, 0xfffffffffffffff);
+ { uint64_t x22, uint8_t x23 = Op (Syntax.SubWithGetBorrow 60 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x20, Return x6, 0xfffffffffffffff);
+ { uint64_t x25, uint8_t x26 = Op (Syntax.SubWithGetBorrow 60 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x23, Return x8, 0xfffffffffffffff);
+ { uint64_t x28, uint8_t x29 = Op (Syntax.SubWithGetBorrow 60 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x26, Return x10, 0xffffffffffffffe);
+ { uint64_t x31, uint8_t x32 = Op (Syntax.SubWithGetBorrow 60 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x29, Return x12, 0xfffffffffffffff);
+ { uint64_t x34, uint8_t x35 = Op (Syntax.SubWithGetBorrow 60 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x32, Return x14, 0xfffffffffffffff);
+ { uint64_t x37, uint8_t x38 = Op (Syntax.SubWithGetBorrow 60 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x35, Return x13, 0xfffffffffffffff);
+ { uint64_t x39 = (uint64_t)cmovznz(x38, 0x0, 0xffffffffffffffffL);
+ { uint64_t x40 = (x39 & 0xfffffffffffffff);
+ { uint64_t x42, uint8_t x43 = Op (Syntax.AddWithGetCarry 60 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x16, Return x40);
+ { uint64_t x44 = (x39 & 0xfffffffffffffff);
+ { uint64_t x46, uint8_t x47 = Op (Syntax.AddWithGetCarry 60 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x43, Return x19, Return x44);
+ { uint64_t x48 = (x39 & 0xfffffffffffffff);
+ { uint64_t x50, uint8_t x51 = Op (Syntax.AddWithGetCarry 60 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x47, Return x22, Return x48);
+ { uint64_t x52 = (x39 & 0xfffffffffffffff);
+ { uint64_t x54, uint8_t x55 = Op (Syntax.AddWithGetCarry 60 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x51, Return x25, Return x52);
+ { uint64_t x56 = (x39 & 0xffffffffffffffe);
+ { uint64_t x58, uint8_t x59 = Op (Syntax.AddWithGetCarry 60 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x55, Return x28, Return x56);
+ { uint64_t x60 = (x39 & 0xfffffffffffffff);
+ { uint64_t x62, uint8_t x63 = Op (Syntax.AddWithGetCarry 60 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x59, Return x31, Return x60);
+ { uint64_t x64 = (x39 & 0xfffffffffffffff);
+ { uint64_t x66, uint8_t x67 = Op (Syntax.AddWithGetCarry 60 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x63, Return x34, Return x64);
+ { uint64_t x68 = (x39 & 0xfffffffffffffff);
+ { uint64_t x70, uint8_t _ = Op (Syntax.AddWithGetCarry 60 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x67, Return x37, Return x68);
+ out[0] = x42;
+ out[1] = x46;
+ out[2] = x50;
+ out[3] = x54;
+ out[4] = x58;
+ out[5] = x62;
+ out[6] = x66;
+ out[7] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e488m17/femul.c b/src/Specific/solinas64_2e488m17/femul.c
index 155b528f3..9f28e5742 100644
--- a/src/Specific/solinas64_2e488m17/femul.c
+++ b/src/Specific/solinas64_2e488m17/femul.c
@@ -1,106 +1,120 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
-{ uint128_t x64 = (((uint128_t)x5 * x62) + (((uint128_t)x7 * x63) + (((uint128_t)x9 * x61) + (((uint128_t)x11 * x59) + (((uint128_t)x13 * x57) + (((uint128_t)x15 * x55) + (((uint128_t)x17 * x53) + (((uint128_t)x19 * x51) + (((uint128_t)x21 * x49) + (((uint128_t)x23 * x47) + (((uint128_t)x25 * x45) + (((uint128_t)x27 * x43) + (((uint128_t)x29 * x41) + (((uint128_t)x31 * x39) + (((uint128_t)x33 * x37) + ((uint128_t)x32 * x35))))))))))))))));
-{ uint128_t x65 = ((((uint128_t)x5 * x63) + (((uint128_t)0x2 * (x7 * x61)) + (((uint128_t)x9 * x59) + (((uint128_t)0x2 * (x11 * x57)) + (((uint128_t)x13 * x55) + (((uint128_t)0x2 * (x15 * x53)) + (((uint128_t)x17 * x51) + (((uint128_t)0x2 * (x19 * x49)) + (((uint128_t)x21 * x47) + (((uint128_t)0x2 * (x23 * x45)) + (((uint128_t)x25 * x43) + (((uint128_t)0x2 * (x27 * x41)) + (((uint128_t)x29 * x39) + (((uint128_t)0x2 * (x31 * x37)) + ((uint128_t)x33 * x35))))))))))))))) + (0x11 * ((uint128_t)0x2 * (x32 * x62))));
-{ uint128_t x66 = ((((uint128_t)x5 * x61) + (((uint128_t)x7 * x59) + (((uint128_t)x9 * x57) + (((uint128_t)x11 * x55) + (((uint128_t)x13 * x53) + (((uint128_t)x15 * x51) + (((uint128_t)x17 * x49) + (((uint128_t)x19 * x47) + (((uint128_t)x21 * x45) + (((uint128_t)x23 * x43) + (((uint128_t)x25 * x41) + (((uint128_t)x27 * x39) + (((uint128_t)x29 * x37) + ((uint128_t)x31 * x35)))))))))))))) + (0x11 * (((uint128_t)x33 * x62) + ((uint128_t)x32 * x63))));
-{ uint128_t x67 = ((((uint128_t)x5 * x59) + (((uint128_t)0x2 * (x7 * x57)) + (((uint128_t)x9 * x55) + (((uint128_t)0x2 * (x11 * x53)) + (((uint128_t)x13 * x51) + (((uint128_t)0x2 * (x15 * x49)) + (((uint128_t)x17 * x47) + (((uint128_t)0x2 * (x19 * x45)) + (((uint128_t)x21 * x43) + (((uint128_t)0x2 * (x23 * x41)) + (((uint128_t)x25 * x39) + (((uint128_t)0x2 * (x27 * x37)) + ((uint128_t)x29 * x35))))))))))))) + (0x11 * (((uint128_t)0x2 * (x31 * x62)) + (((uint128_t)x33 * x63) + ((uint128_t)0x2 * (x32 * x61))))));
-{ uint128_t x68 = ((((uint128_t)x5 * x57) + (((uint128_t)x7 * x55) + (((uint128_t)x9 * x53) + (((uint128_t)x11 * x51) + (((uint128_t)x13 * x49) + (((uint128_t)x15 * x47) + (((uint128_t)x17 * x45) + (((uint128_t)x19 * x43) + (((uint128_t)x21 * x41) + (((uint128_t)x23 * x39) + (((uint128_t)x25 * x37) + ((uint128_t)x27 * x35)))))))))))) + (0x11 * (((uint128_t)x29 * x62) + (((uint128_t)x31 * x63) + (((uint128_t)x33 * x61) + ((uint128_t)x32 * x59))))));
-{ uint128_t x69 = ((((uint128_t)x5 * x55) + (((uint128_t)0x2 * (x7 * x53)) + (((uint128_t)x9 * x51) + (((uint128_t)0x2 * (x11 * x49)) + (((uint128_t)x13 * x47) + (((uint128_t)0x2 * (x15 * x45)) + (((uint128_t)x17 * x43) + (((uint128_t)0x2 * (x19 * x41)) + (((uint128_t)x21 * x39) + (((uint128_t)0x2 * (x23 * x37)) + ((uint128_t)x25 * x35))))))))))) + (0x11 * (((uint128_t)0x2 * (x27 * x62)) + (((uint128_t)x29 * x63) + (((uint128_t)0x2 * (x31 * x61)) + (((uint128_t)x33 * x59) + ((uint128_t)0x2 * (x32 * x57))))))));
-{ uint128_t x70 = ((((uint128_t)x5 * x53) + (((uint128_t)x7 * x51) + (((uint128_t)x9 * x49) + (((uint128_t)x11 * x47) + (((uint128_t)x13 * x45) + (((uint128_t)x15 * x43) + (((uint128_t)x17 * x41) + (((uint128_t)x19 * x39) + (((uint128_t)x21 * x37) + ((uint128_t)x23 * x35)))))))))) + (0x11 * (((uint128_t)x25 * x62) + (((uint128_t)x27 * x63) + (((uint128_t)x29 * x61) + (((uint128_t)x31 * x59) + (((uint128_t)x33 * x57) + ((uint128_t)x32 * x55))))))));
-{ uint128_t x71 = ((((uint128_t)x5 * x51) + (((uint128_t)0x2 * (x7 * x49)) + (((uint128_t)x9 * x47) + (((uint128_t)0x2 * (x11 * x45)) + (((uint128_t)x13 * x43) + (((uint128_t)0x2 * (x15 * x41)) + (((uint128_t)x17 * x39) + (((uint128_t)0x2 * (x19 * x37)) + ((uint128_t)x21 * x35))))))))) + (0x11 * (((uint128_t)0x2 * (x23 * x62)) + (((uint128_t)x25 * x63) + (((uint128_t)0x2 * (x27 * x61)) + (((uint128_t)x29 * x59) + (((uint128_t)0x2 * (x31 * x57)) + (((uint128_t)x33 * x55) + ((uint128_t)0x2 * (x32 * x53))))))))));
-{ uint128_t x72 = ((((uint128_t)x5 * x49) + (((uint128_t)x7 * x47) + (((uint128_t)x9 * x45) + (((uint128_t)x11 * x43) + (((uint128_t)x13 * x41) + (((uint128_t)x15 * x39) + (((uint128_t)x17 * x37) + ((uint128_t)x19 * x35)))))))) + (0x11 * (((uint128_t)x21 * x62) + (((uint128_t)x23 * x63) + (((uint128_t)x25 * x61) + (((uint128_t)x27 * x59) + (((uint128_t)x29 * x57) + (((uint128_t)x31 * x55) + (((uint128_t)x33 * x53) + ((uint128_t)x32 * x51))))))))));
-{ uint128_t x73 = ((((uint128_t)x5 * x47) + (((uint128_t)0x2 * (x7 * x45)) + (((uint128_t)x9 * x43) + (((uint128_t)0x2 * (x11 * x41)) + (((uint128_t)x13 * x39) + (((uint128_t)0x2 * (x15 * x37)) + ((uint128_t)x17 * x35))))))) + (0x11 * (((uint128_t)0x2 * (x19 * x62)) + (((uint128_t)x21 * x63) + (((uint128_t)0x2 * (x23 * x61)) + (((uint128_t)x25 * x59) + (((uint128_t)0x2 * (x27 * x57)) + (((uint128_t)x29 * x55) + (((uint128_t)0x2 * (x31 * x53)) + (((uint128_t)x33 * x51) + ((uint128_t)0x2 * (x32 * x49))))))))))));
-{ uint128_t x74 = ((((uint128_t)x5 * x45) + (((uint128_t)x7 * x43) + (((uint128_t)x9 * x41) + (((uint128_t)x11 * x39) + (((uint128_t)x13 * x37) + ((uint128_t)x15 * x35)))))) + (0x11 * (((uint128_t)x17 * x62) + (((uint128_t)x19 * x63) + (((uint128_t)x21 * x61) + (((uint128_t)x23 * x59) + (((uint128_t)x25 * x57) + (((uint128_t)x27 * x55) + (((uint128_t)x29 * x53) + (((uint128_t)x31 * x51) + (((uint128_t)x33 * x49) + ((uint128_t)x32 * x47))))))))))));
-{ uint128_t x75 = ((((uint128_t)x5 * x43) + (((uint128_t)0x2 * (x7 * x41)) + (((uint128_t)x9 * x39) + (((uint128_t)0x2 * (x11 * x37)) + ((uint128_t)x13 * x35))))) + (0x11 * (((uint128_t)0x2 * (x15 * x62)) + (((uint128_t)x17 * x63) + (((uint128_t)0x2 * (x19 * x61)) + (((uint128_t)x21 * x59) + (((uint128_t)0x2 * (x23 * x57)) + (((uint128_t)x25 * x55) + (((uint128_t)0x2 * (x27 * x53)) + (((uint128_t)x29 * x51) + (((uint128_t)0x2 * (x31 * x49)) + (((uint128_t)x33 * x47) + ((uint128_t)0x2 * (x32 * x45))))))))))))));
-{ uint128_t x76 = ((((uint128_t)x5 * x41) + (((uint128_t)x7 * x39) + (((uint128_t)x9 * x37) + ((uint128_t)x11 * x35)))) + (0x11 * (((uint128_t)x13 * x62) + (((uint128_t)x15 * x63) + (((uint128_t)x17 * x61) + (((uint128_t)x19 * x59) + (((uint128_t)x21 * x57) + (((uint128_t)x23 * x55) + (((uint128_t)x25 * x53) + (((uint128_t)x27 * x51) + (((uint128_t)x29 * x49) + (((uint128_t)x31 * x47) + (((uint128_t)x33 * x45) + ((uint128_t)x32 * x43))))))))))))));
-{ uint128_t x77 = ((((uint128_t)x5 * x39) + (((uint128_t)0x2 * (x7 * x37)) + ((uint128_t)x9 * x35))) + (0x11 * (((uint128_t)0x2 * (x11 * x62)) + (((uint128_t)x13 * x63) + (((uint128_t)0x2 * (x15 * x61)) + (((uint128_t)x17 * x59) + (((uint128_t)0x2 * (x19 * x57)) + (((uint128_t)x21 * x55) + (((uint128_t)0x2 * (x23 * x53)) + (((uint128_t)x25 * x51) + (((uint128_t)0x2 * (x27 * x49)) + (((uint128_t)x29 * x47) + (((uint128_t)0x2 * (x31 * x45)) + (((uint128_t)x33 * x43) + ((uint128_t)0x2 * (x32 * x41))))))))))))))));
-{ uint128_t x78 = ((((uint128_t)x5 * x37) + ((uint128_t)x7 * x35)) + (0x11 * (((uint128_t)x9 * x62) + (((uint128_t)x11 * x63) + (((uint128_t)x13 * x61) + (((uint128_t)x15 * x59) + (((uint128_t)x17 * x57) + (((uint128_t)x19 * x55) + (((uint128_t)x21 * x53) + (((uint128_t)x23 * x51) + (((uint128_t)x25 * x49) + (((uint128_t)x27 * x47) + (((uint128_t)x29 * x45) + (((uint128_t)x31 * x43) + (((uint128_t)x33 * x41) + ((uint128_t)x32 * x39))))))))))))))));
-{ uint128_t x79 = (((uint128_t)x5 * x35) + (0x11 * (((uint128_t)0x2 * (x7 * x62)) + (((uint128_t)x9 * x63) + (((uint128_t)0x2 * (x11 * x61)) + (((uint128_t)x13 * x59) + (((uint128_t)0x2 * (x15 * x57)) + (((uint128_t)x17 * x55) + (((uint128_t)0x2 * (x19 * x53)) + (((uint128_t)x21 * x51) + (((uint128_t)0x2 * (x23 * x49)) + (((uint128_t)x25 * x47) + (((uint128_t)0x2 * (x27 * x45)) + (((uint128_t)x29 * x43) + (((uint128_t)0x2 * (x31 * x41)) + (((uint128_t)x33 * x39) + ((uint128_t)0x2 * (x32 * x37))))))))))))))))));
-{ uint64_t x80 = (uint64_t) (x79 >> 0x1f);
-{ uint64_t x81 = ((uint64_t)x79 & 0x7fffffff);
-{ uint128_t x82 = (x80 + x78);
-{ uint64_t x83 = (uint64_t) (x82 >> 0x1e);
-{ uint64_t x84 = ((uint64_t)x82 & 0x3fffffff);
-{ uint128_t x85 = (x83 + x77);
-{ uint64_t x86 = (uint64_t) (x85 >> 0x1f);
-{ uint64_t x87 = ((uint64_t)x85 & 0x7fffffff);
-{ uint128_t x88 = (x86 + x76);
-{ uint64_t x89 = (uint64_t) (x88 >> 0x1e);
-{ uint64_t x90 = ((uint64_t)x88 & 0x3fffffff);
-{ uint128_t x91 = (x89 + x75);
-{ uint64_t x92 = (uint64_t) (x91 >> 0x1f);
-{ uint64_t x93 = ((uint64_t)x91 & 0x7fffffff);
-{ uint128_t x94 = (x92 + x74);
-{ uint64_t x95 = (uint64_t) (x94 >> 0x1e);
-{ uint64_t x96 = ((uint64_t)x94 & 0x3fffffff);
-{ uint128_t x97 = (x95 + x73);
-{ uint64_t x98 = (uint64_t) (x97 >> 0x1f);
-{ uint64_t x99 = ((uint64_t)x97 & 0x7fffffff);
-{ uint128_t x100 = (x98 + x72);
-{ uint64_t x101 = (uint64_t) (x100 >> 0x1e);
-{ uint64_t x102 = ((uint64_t)x100 & 0x3fffffff);
-{ uint128_t x103 = (x101 + x71);
-{ uint64_t x104 = (uint64_t) (x103 >> 0x1f);
-{ uint64_t x105 = ((uint64_t)x103 & 0x7fffffff);
-{ uint128_t x106 = (x104 + x70);
-{ uint64_t x107 = (uint64_t) (x106 >> 0x1e);
-{ uint64_t x108 = ((uint64_t)x106 & 0x3fffffff);
-{ uint128_t x109 = (x107 + x69);
-{ uint64_t x110 = (uint64_t) (x109 >> 0x1f);
-{ uint64_t x111 = ((uint64_t)x109 & 0x7fffffff);
-{ uint128_t x112 = (x110 + x68);
-{ uint64_t x113 = (uint64_t) (x112 >> 0x1e);
-{ uint64_t x114 = ((uint64_t)x112 & 0x3fffffff);
-{ uint128_t x115 = (x113 + x67);
-{ uint64_t x116 = (uint64_t) (x115 >> 0x1f);
-{ uint64_t x117 = ((uint64_t)x115 & 0x7fffffff);
-{ uint128_t x118 = (x116 + x66);
-{ uint64_t x119 = (uint64_t) (x118 >> 0x1e);
-{ uint64_t x120 = ((uint64_t)x118 & 0x3fffffff);
-{ uint128_t x121 = (x119 + x65);
-{ uint64_t x122 = (uint64_t) (x121 >> 0x1f);
-{ uint64_t x123 = ((uint64_t)x121 & 0x7fffffff);
-{ uint128_t x124 = (x122 + x64);
-{ uint64_t x125 = (uint64_t) (x124 >> 0x1e);
-{ uint64_t x126 = ((uint64_t)x124 & 0x3fffffff);
-{ uint64_t x127 = (x81 + (0x11 * x125));
-{ uint64_t x128 = (x127 >> 0x1f);
-{ uint64_t x129 = (x127 & 0x7fffffff);
-{ uint64_t x130 = (x128 + x84);
-{ uint64_t x131 = (x130 >> 0x1e);
-{ uint64_t x132 = (x130 & 0x3fffffff);
-out[0] = x126;
-out[1] = x123;
-out[2] = x120;
-out[3] = x117;
-out[4] = x114;
-out[5] = x111;
-out[6] = x108;
-out[7] = x105;
-out[8] = x102;
-out[9] = x99;
-out[10] = x96;
-out[11] = x93;
-out[12] = x90;
-out[13] = x131 + x87;
-out[14] = x132;
-out[15] = x129;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[16];
+static void femul(uint64_t out[16], const uint64_t in1[16], const uint64_t in2[16]) {
+ { const uint64_t x32 = in1[15];
+ { const uint64_t x33 = in1[14];
+ { const uint64_t x31 = in1[13];
+ { const uint64_t x29 = in1[12];
+ { const uint64_t x27 = in1[11];
+ { const uint64_t x25 = in1[10];
+ { const uint64_t x23 = in1[9];
+ { const uint64_t x21 = in1[8];
+ { const uint64_t x19 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x62 = in2[15];
+ { const uint64_t x63 = in2[14];
+ { const uint64_t x61 = in2[13];
+ { const uint64_t x59 = in2[12];
+ { const uint64_t x57 = in2[11];
+ { const uint64_t x55 = in2[10];
+ { const uint64_t x53 = in2[9];
+ { const uint64_t x51 = in2[8];
+ { const uint64_t x49 = in2[7];
+ { const uint64_t x47 = in2[6];
+ { const uint64_t x45 = in2[5];
+ { const uint64_t x43 = in2[4];
+ { const uint64_t x41 = in2[3];
+ { const uint64_t x39 = in2[2];
+ { const uint64_t x37 = in2[1];
+ { const uint64_t x35 = in2[0];
+ { uint128_t x64 = (((uint128_t)x5 * x62) + (((uint128_t)x7 * x63) + (((uint128_t)x9 * x61) + (((uint128_t)x11 * x59) + (((uint128_t)x13 * x57) + (((uint128_t)x15 * x55) + (((uint128_t)x17 * x53) + (((uint128_t)x19 * x51) + (((uint128_t)x21 * x49) + (((uint128_t)x23 * x47) + (((uint128_t)x25 * x45) + (((uint128_t)x27 * x43) + (((uint128_t)x29 * x41) + (((uint128_t)x31 * x39) + (((uint128_t)x33 * x37) + ((uint128_t)x32 * x35))))))))))))))));
+ { uint128_t x65 = ((((uint128_t)x5 * x63) + (((uint128_t)0x2 * (x7 * x61)) + (((uint128_t)x9 * x59) + (((uint128_t)0x2 * (x11 * x57)) + (((uint128_t)x13 * x55) + (((uint128_t)0x2 * (x15 * x53)) + (((uint128_t)x17 * x51) + (((uint128_t)0x2 * (x19 * x49)) + (((uint128_t)x21 * x47) + (((uint128_t)0x2 * (x23 * x45)) + (((uint128_t)x25 * x43) + (((uint128_t)0x2 * (x27 * x41)) + (((uint128_t)x29 * x39) + (((uint128_t)0x2 * (x31 * x37)) + ((uint128_t)x33 * x35))))))))))))))) + (0x11 * ((uint128_t)0x2 * (x32 * x62))));
+ { uint128_t x66 = ((((uint128_t)x5 * x61) + (((uint128_t)x7 * x59) + (((uint128_t)x9 * x57) + (((uint128_t)x11 * x55) + (((uint128_t)x13 * x53) + (((uint128_t)x15 * x51) + (((uint128_t)x17 * x49) + (((uint128_t)x19 * x47) + (((uint128_t)x21 * x45) + (((uint128_t)x23 * x43) + (((uint128_t)x25 * x41) + (((uint128_t)x27 * x39) + (((uint128_t)x29 * x37) + ((uint128_t)x31 * x35)))))))))))))) + (0x11 * (((uint128_t)x33 * x62) + ((uint128_t)x32 * x63))));
+ { uint128_t x67 = ((((uint128_t)x5 * x59) + (((uint128_t)0x2 * (x7 * x57)) + (((uint128_t)x9 * x55) + (((uint128_t)0x2 * (x11 * x53)) + (((uint128_t)x13 * x51) + (((uint128_t)0x2 * (x15 * x49)) + (((uint128_t)x17 * x47) + (((uint128_t)0x2 * (x19 * x45)) + (((uint128_t)x21 * x43) + (((uint128_t)0x2 * (x23 * x41)) + (((uint128_t)x25 * x39) + (((uint128_t)0x2 * (x27 * x37)) + ((uint128_t)x29 * x35))))))))))))) + (0x11 * (((uint128_t)0x2 * (x31 * x62)) + (((uint128_t)x33 * x63) + ((uint128_t)0x2 * (x32 * x61))))));
+ { uint128_t x68 = ((((uint128_t)x5 * x57) + (((uint128_t)x7 * x55) + (((uint128_t)x9 * x53) + (((uint128_t)x11 * x51) + (((uint128_t)x13 * x49) + (((uint128_t)x15 * x47) + (((uint128_t)x17 * x45) + (((uint128_t)x19 * x43) + (((uint128_t)x21 * x41) + (((uint128_t)x23 * x39) + (((uint128_t)x25 * x37) + ((uint128_t)x27 * x35)))))))))))) + (0x11 * (((uint128_t)x29 * x62) + (((uint128_t)x31 * x63) + (((uint128_t)x33 * x61) + ((uint128_t)x32 * x59))))));
+ { uint128_t x69 = ((((uint128_t)x5 * x55) + (((uint128_t)0x2 * (x7 * x53)) + (((uint128_t)x9 * x51) + (((uint128_t)0x2 * (x11 * x49)) + (((uint128_t)x13 * x47) + (((uint128_t)0x2 * (x15 * x45)) + (((uint128_t)x17 * x43) + (((uint128_t)0x2 * (x19 * x41)) + (((uint128_t)x21 * x39) + (((uint128_t)0x2 * (x23 * x37)) + ((uint128_t)x25 * x35))))))))))) + (0x11 * (((uint128_t)0x2 * (x27 * x62)) + (((uint128_t)x29 * x63) + (((uint128_t)0x2 * (x31 * x61)) + (((uint128_t)x33 * x59) + ((uint128_t)0x2 * (x32 * x57))))))));
+ { uint128_t x70 = ((((uint128_t)x5 * x53) + (((uint128_t)x7 * x51) + (((uint128_t)x9 * x49) + (((uint128_t)x11 * x47) + (((uint128_t)x13 * x45) + (((uint128_t)x15 * x43) + (((uint128_t)x17 * x41) + (((uint128_t)x19 * x39) + (((uint128_t)x21 * x37) + ((uint128_t)x23 * x35)))))))))) + (0x11 * (((uint128_t)x25 * x62) + (((uint128_t)x27 * x63) + (((uint128_t)x29 * x61) + (((uint128_t)x31 * x59) + (((uint128_t)x33 * x57) + ((uint128_t)x32 * x55))))))));
+ { uint128_t x71 = ((((uint128_t)x5 * x51) + (((uint128_t)0x2 * (x7 * x49)) + (((uint128_t)x9 * x47) + (((uint128_t)0x2 * (x11 * x45)) + (((uint128_t)x13 * x43) + (((uint128_t)0x2 * (x15 * x41)) + (((uint128_t)x17 * x39) + (((uint128_t)0x2 * (x19 * x37)) + ((uint128_t)x21 * x35))))))))) + (0x11 * (((uint128_t)0x2 * (x23 * x62)) + (((uint128_t)x25 * x63) + (((uint128_t)0x2 * (x27 * x61)) + (((uint128_t)x29 * x59) + (((uint128_t)0x2 * (x31 * x57)) + (((uint128_t)x33 * x55) + ((uint128_t)0x2 * (x32 * x53))))))))));
+ { uint128_t x72 = ((((uint128_t)x5 * x49) + (((uint128_t)x7 * x47) + (((uint128_t)x9 * x45) + (((uint128_t)x11 * x43) + (((uint128_t)x13 * x41) + (((uint128_t)x15 * x39) + (((uint128_t)x17 * x37) + ((uint128_t)x19 * x35)))))))) + (0x11 * (((uint128_t)x21 * x62) + (((uint128_t)x23 * x63) + (((uint128_t)x25 * x61) + (((uint128_t)x27 * x59) + (((uint128_t)x29 * x57) + (((uint128_t)x31 * x55) + (((uint128_t)x33 * x53) + ((uint128_t)x32 * x51))))))))));
+ { uint128_t x73 = ((((uint128_t)x5 * x47) + (((uint128_t)0x2 * (x7 * x45)) + (((uint128_t)x9 * x43) + (((uint128_t)0x2 * (x11 * x41)) + (((uint128_t)x13 * x39) + (((uint128_t)0x2 * (x15 * x37)) + ((uint128_t)x17 * x35))))))) + (0x11 * (((uint128_t)0x2 * (x19 * x62)) + (((uint128_t)x21 * x63) + (((uint128_t)0x2 * (x23 * x61)) + (((uint128_t)x25 * x59) + (((uint128_t)0x2 * (x27 * x57)) + (((uint128_t)x29 * x55) + (((uint128_t)0x2 * (x31 * x53)) + (((uint128_t)x33 * x51) + ((uint128_t)0x2 * (x32 * x49))))))))))));
+ { uint128_t x74 = ((((uint128_t)x5 * x45) + (((uint128_t)x7 * x43) + (((uint128_t)x9 * x41) + (((uint128_t)x11 * x39) + (((uint128_t)x13 * x37) + ((uint128_t)x15 * x35)))))) + (0x11 * (((uint128_t)x17 * x62) + (((uint128_t)x19 * x63) + (((uint128_t)x21 * x61) + (((uint128_t)x23 * x59) + (((uint128_t)x25 * x57) + (((uint128_t)x27 * x55) + (((uint128_t)x29 * x53) + (((uint128_t)x31 * x51) + (((uint128_t)x33 * x49) + ((uint128_t)x32 * x47))))))))))));
+ { uint128_t x75 = ((((uint128_t)x5 * x43) + (((uint128_t)0x2 * (x7 * x41)) + (((uint128_t)x9 * x39) + (((uint128_t)0x2 * (x11 * x37)) + ((uint128_t)x13 * x35))))) + (0x11 * (((uint128_t)0x2 * (x15 * x62)) + (((uint128_t)x17 * x63) + (((uint128_t)0x2 * (x19 * x61)) + (((uint128_t)x21 * x59) + (((uint128_t)0x2 * (x23 * x57)) + (((uint128_t)x25 * x55) + (((uint128_t)0x2 * (x27 * x53)) + (((uint128_t)x29 * x51) + (((uint128_t)0x2 * (x31 * x49)) + (((uint128_t)x33 * x47) + ((uint128_t)0x2 * (x32 * x45))))))))))))));
+ { uint128_t x76 = ((((uint128_t)x5 * x41) + (((uint128_t)x7 * x39) + (((uint128_t)x9 * x37) + ((uint128_t)x11 * x35)))) + (0x11 * (((uint128_t)x13 * x62) + (((uint128_t)x15 * x63) + (((uint128_t)x17 * x61) + (((uint128_t)x19 * x59) + (((uint128_t)x21 * x57) + (((uint128_t)x23 * x55) + (((uint128_t)x25 * x53) + (((uint128_t)x27 * x51) + (((uint128_t)x29 * x49) + (((uint128_t)x31 * x47) + (((uint128_t)x33 * x45) + ((uint128_t)x32 * x43))))))))))))));
+ { uint128_t x77 = ((((uint128_t)x5 * x39) + (((uint128_t)0x2 * (x7 * x37)) + ((uint128_t)x9 * x35))) + (0x11 * (((uint128_t)0x2 * (x11 * x62)) + (((uint128_t)x13 * x63) + (((uint128_t)0x2 * (x15 * x61)) + (((uint128_t)x17 * x59) + (((uint128_t)0x2 * (x19 * x57)) + (((uint128_t)x21 * x55) + (((uint128_t)0x2 * (x23 * x53)) + (((uint128_t)x25 * x51) + (((uint128_t)0x2 * (x27 * x49)) + (((uint128_t)x29 * x47) + (((uint128_t)0x2 * (x31 * x45)) + (((uint128_t)x33 * x43) + ((uint128_t)0x2 * (x32 * x41))))))))))))))));
+ { uint128_t x78 = ((((uint128_t)x5 * x37) + ((uint128_t)x7 * x35)) + (0x11 * (((uint128_t)x9 * x62) + (((uint128_t)x11 * x63) + (((uint128_t)x13 * x61) + (((uint128_t)x15 * x59) + (((uint128_t)x17 * x57) + (((uint128_t)x19 * x55) + (((uint128_t)x21 * x53) + (((uint128_t)x23 * x51) + (((uint128_t)x25 * x49) + (((uint128_t)x27 * x47) + (((uint128_t)x29 * x45) + (((uint128_t)x31 * x43) + (((uint128_t)x33 * x41) + ((uint128_t)x32 * x39))))))))))))))));
+ { uint128_t x79 = (((uint128_t)x5 * x35) + (0x11 * (((uint128_t)0x2 * (x7 * x62)) + (((uint128_t)x9 * x63) + (((uint128_t)0x2 * (x11 * x61)) + (((uint128_t)x13 * x59) + (((uint128_t)0x2 * (x15 * x57)) + (((uint128_t)x17 * x55) + (((uint128_t)0x2 * (x19 * x53)) + (((uint128_t)x21 * x51) + (((uint128_t)0x2 * (x23 * x49)) + (((uint128_t)x25 * x47) + (((uint128_t)0x2 * (x27 * x45)) + (((uint128_t)x29 * x43) + (((uint128_t)0x2 * (x31 * x41)) + (((uint128_t)x33 * x39) + ((uint128_t)0x2 * (x32 * x37))))))))))))))))));
+ { uint64_t x80 = (uint64_t) (x79 >> 0x1f);
+ { uint64_t x81 = ((uint64_t)x79 & 0x7fffffff);
+ { uint128_t x82 = (x80 + x78);
+ { uint64_t x83 = (uint64_t) (x82 >> 0x1e);
+ { uint64_t x84 = ((uint64_t)x82 & 0x3fffffff);
+ { uint128_t x85 = (x83 + x77);
+ { uint64_t x86 = (uint64_t) (x85 >> 0x1f);
+ { uint64_t x87 = ((uint64_t)x85 & 0x7fffffff);
+ { uint128_t x88 = (x86 + x76);
+ { uint64_t x89 = (uint64_t) (x88 >> 0x1e);
+ { uint64_t x90 = ((uint64_t)x88 & 0x3fffffff);
+ { uint128_t x91 = (x89 + x75);
+ { uint64_t x92 = (uint64_t) (x91 >> 0x1f);
+ { uint64_t x93 = ((uint64_t)x91 & 0x7fffffff);
+ { uint128_t x94 = (x92 + x74);
+ { uint64_t x95 = (uint64_t) (x94 >> 0x1e);
+ { uint64_t x96 = ((uint64_t)x94 & 0x3fffffff);
+ { uint128_t x97 = (x95 + x73);
+ { uint64_t x98 = (uint64_t) (x97 >> 0x1f);
+ { uint64_t x99 = ((uint64_t)x97 & 0x7fffffff);
+ { uint128_t x100 = (x98 + x72);
+ { uint64_t x101 = (uint64_t) (x100 >> 0x1e);
+ { uint64_t x102 = ((uint64_t)x100 & 0x3fffffff);
+ { uint128_t x103 = (x101 + x71);
+ { uint64_t x104 = (uint64_t) (x103 >> 0x1f);
+ { uint64_t x105 = ((uint64_t)x103 & 0x7fffffff);
+ { uint128_t x106 = (x104 + x70);
+ { uint64_t x107 = (uint64_t) (x106 >> 0x1e);
+ { uint64_t x108 = ((uint64_t)x106 & 0x3fffffff);
+ { uint128_t x109 = (x107 + x69);
+ { uint64_t x110 = (uint64_t) (x109 >> 0x1f);
+ { uint64_t x111 = ((uint64_t)x109 & 0x7fffffff);
+ { uint128_t x112 = (x110 + x68);
+ { uint64_t x113 = (uint64_t) (x112 >> 0x1e);
+ { uint64_t x114 = ((uint64_t)x112 & 0x3fffffff);
+ { uint128_t x115 = (x113 + x67);
+ { uint64_t x116 = (uint64_t) (x115 >> 0x1f);
+ { uint64_t x117 = ((uint64_t)x115 & 0x7fffffff);
+ { uint128_t x118 = (x116 + x66);
+ { uint64_t x119 = (uint64_t) (x118 >> 0x1e);
+ { uint64_t x120 = ((uint64_t)x118 & 0x3fffffff);
+ { uint128_t x121 = (x119 + x65);
+ { uint64_t x122 = (uint64_t) (x121 >> 0x1f);
+ { uint64_t x123 = ((uint64_t)x121 & 0x7fffffff);
+ { uint128_t x124 = (x122 + x64);
+ { uint64_t x125 = (uint64_t) (x124 >> 0x1e);
+ { uint64_t x126 = ((uint64_t)x124 & 0x3fffffff);
+ { uint64_t x127 = (x81 + (0x11 * x125));
+ { uint64_t x128 = (x127 >> 0x1f);
+ { uint64_t x129 = (x127 & 0x7fffffff);
+ { uint64_t x130 = (x128 + x84);
+ { uint64_t x131 = (x130 >> 0x1e);
+ { uint64_t x132 = (x130 & 0x3fffffff);
+ out[0] = x129;
+ out[1] = x132;
+ out[2] = (x131 + x87);
+ out[3] = x90;
+ out[4] = x93;
+ out[5] = x96;
+ out[6] = x99;
+ out[7] = x102;
+ out[8] = x105;
+ out[9] = x108;
+ out[10] = x111;
+ out[11] = x114;
+ out[12] = x117;
+ out[13] = x120;
+ out[14] = x123;
+ out[15] = x126;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e488m17/fesquare.c b/src/Specific/solinas64_2e488m17/fesquare.c
index ff95eaf88..d05416f90 100644
--- a/src/Specific/solinas64_2e488m17/fesquare.c
+++ b/src/Specific/solinas64_2e488m17/fesquare.c
@@ -1,106 +1,104 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x31 = (((uint128_t)x2 * x29) + (((uint128_t)x4 * x30) + (((uint128_t)x6 * x28) + (((uint128_t)x8 * x26) + (((uint128_t)x10 * x24) + (((uint128_t)x12 * x22) + (((uint128_t)x14 * x20) + (((uint128_t)x16 * x18) + (((uint128_t)x18 * x16) + (((uint128_t)x20 * x14) + (((uint128_t)x22 * x12) + (((uint128_t)x24 * x10) + (((uint128_t)x26 * x8) + (((uint128_t)x28 * x6) + (((uint128_t)x30 * x4) + ((uint128_t)x29 * x2))))))))))))))));
-{ uint128_t x32 = ((((uint128_t)x2 * x30) + (((uint128_t)0x2 * (x4 * x28)) + (((uint128_t)x6 * x26) + (((uint128_t)0x2 * (x8 * x24)) + (((uint128_t)x10 * x22) + (((uint128_t)0x2 * (x12 * x20)) + (((uint128_t)x14 * x18) + (((uint128_t)0x2 * (x16 * x16)) + (((uint128_t)x18 * x14) + (((uint128_t)0x2 * (x20 * x12)) + (((uint128_t)x22 * x10) + (((uint128_t)0x2 * (x24 * x8)) + (((uint128_t)x26 * x6) + (((uint128_t)0x2 * (x28 * x4)) + ((uint128_t)x30 * x2))))))))))))))) + (0x11 * ((uint128_t)0x2 * (x29 * x29))));
-{ uint128_t x33 = ((((uint128_t)x2 * x28) + (((uint128_t)x4 * x26) + (((uint128_t)x6 * x24) + (((uint128_t)x8 * x22) + (((uint128_t)x10 * x20) + (((uint128_t)x12 * x18) + (((uint128_t)x14 * x16) + (((uint128_t)x16 * x14) + (((uint128_t)x18 * x12) + (((uint128_t)x20 * x10) + (((uint128_t)x22 * x8) + (((uint128_t)x24 * x6) + (((uint128_t)x26 * x4) + ((uint128_t)x28 * x2)))))))))))))) + (0x11 * (((uint128_t)x30 * x29) + ((uint128_t)x29 * x30))));
-{ uint128_t x34 = ((((uint128_t)x2 * x26) + (((uint128_t)0x2 * (x4 * x24)) + (((uint128_t)x6 * x22) + (((uint128_t)0x2 * (x8 * x20)) + (((uint128_t)x10 * x18) + (((uint128_t)0x2 * (x12 * x16)) + (((uint128_t)x14 * x14) + (((uint128_t)0x2 * (x16 * x12)) + (((uint128_t)x18 * x10) + (((uint128_t)0x2 * (x20 * x8)) + (((uint128_t)x22 * x6) + (((uint128_t)0x2 * (x24 * x4)) + ((uint128_t)x26 * x2))))))))))))) + (0x11 * (((uint128_t)0x2 * (x28 * x29)) + (((uint128_t)x30 * x30) + ((uint128_t)0x2 * (x29 * x28))))));
-{ uint128_t x35 = ((((uint128_t)x2 * x24) + (((uint128_t)x4 * x22) + (((uint128_t)x6 * x20) + (((uint128_t)x8 * x18) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + (((uint128_t)x18 * x8) + (((uint128_t)x20 * x6) + (((uint128_t)x22 * x4) + ((uint128_t)x24 * x2)))))))))))) + (0x11 * (((uint128_t)x26 * x29) + (((uint128_t)x28 * x30) + (((uint128_t)x30 * x28) + ((uint128_t)x29 * x26))))));
-{ uint128_t x36 = ((((uint128_t)x2 * x22) + (((uint128_t)0x2 * (x4 * x20)) + (((uint128_t)x6 * x18) + (((uint128_t)0x2 * (x8 * x16)) + (((uint128_t)x10 * x14) + (((uint128_t)0x2 * (x12 * x12)) + (((uint128_t)x14 * x10) + (((uint128_t)0x2 * (x16 * x8)) + (((uint128_t)x18 * x6) + (((uint128_t)0x2 * (x20 * x4)) + ((uint128_t)x22 * x2))))))))))) + (0x11 * (((uint128_t)0x2 * (x24 * x29)) + (((uint128_t)x26 * x30) + (((uint128_t)0x2 * (x28 * x28)) + (((uint128_t)x30 * x26) + ((uint128_t)0x2 * (x29 * x24))))))));
-{ uint128_t x37 = ((((uint128_t)x2 * x20) + (((uint128_t)x4 * x18) + (((uint128_t)x6 * x16) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + (((uint128_t)x16 * x6) + (((uint128_t)x18 * x4) + ((uint128_t)x20 * x2)))))))))) + (0x11 * (((uint128_t)x22 * x29) + (((uint128_t)x24 * x30) + (((uint128_t)x26 * x28) + (((uint128_t)x28 * x26) + (((uint128_t)x30 * x24) + ((uint128_t)x29 * x22))))))));
-{ uint128_t x38 = ((((uint128_t)x2 * x18) + (((uint128_t)0x2 * (x4 * x16)) + (((uint128_t)x6 * x14) + (((uint128_t)0x2 * (x8 * x12)) + (((uint128_t)x10 * x10) + (((uint128_t)0x2 * (x12 * x8)) + (((uint128_t)x14 * x6) + (((uint128_t)0x2 * (x16 * x4)) + ((uint128_t)x18 * x2))))))))) + (0x11 * (((uint128_t)0x2 * (x20 * x29)) + (((uint128_t)x22 * x30) + (((uint128_t)0x2 * (x24 * x28)) + (((uint128_t)x26 * x26) + (((uint128_t)0x2 * (x28 * x24)) + (((uint128_t)x30 * x22) + ((uint128_t)0x2 * (x29 * x20))))))))));
-{ uint128_t x39 = ((((uint128_t)x2 * x16) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x16 * x2)))))))) + (0x11 * (((uint128_t)x18 * x29) + (((uint128_t)x20 * x30) + (((uint128_t)x22 * x28) + (((uint128_t)x24 * x26) + (((uint128_t)x26 * x24) + (((uint128_t)x28 * x22) + (((uint128_t)x30 * x20) + ((uint128_t)x29 * x18))))))))));
-{ uint128_t x40 = ((((uint128_t)x2 * x14) + (((uint128_t)0x2 * (x4 * x12)) + (((uint128_t)x6 * x10) + (((uint128_t)0x2 * (x8 * x8)) + (((uint128_t)x10 * x6) + (((uint128_t)0x2 * (x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x11 * (((uint128_t)0x2 * (x16 * x29)) + (((uint128_t)x18 * x30) + (((uint128_t)0x2 * (x20 * x28)) + (((uint128_t)x22 * x26) + (((uint128_t)0x2 * (x24 * x24)) + (((uint128_t)x26 * x22) + (((uint128_t)0x2 * (x28 * x20)) + (((uint128_t)x30 * x18) + ((uint128_t)0x2 * (x29 * x16))))))))))));
-{ uint128_t x41 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x11 * (((uint128_t)x14 * x29) + (((uint128_t)x16 * x30) + (((uint128_t)x18 * x28) + (((uint128_t)x20 * x26) + (((uint128_t)x22 * x24) + (((uint128_t)x24 * x22) + (((uint128_t)x26 * x20) + (((uint128_t)x28 * x18) + (((uint128_t)x30 * x16) + ((uint128_t)x29 * x14))))))))))));
-{ uint128_t x42 = ((((uint128_t)x2 * x10) + (((uint128_t)0x2 * (x4 * x8)) + (((uint128_t)x6 * x6) + (((uint128_t)0x2 * (x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x11 * (((uint128_t)0x2 * (x12 * x29)) + (((uint128_t)x14 * x30) + (((uint128_t)0x2 * (x16 * x28)) + (((uint128_t)x18 * x26) + (((uint128_t)0x2 * (x20 * x24)) + (((uint128_t)x22 * x22) + (((uint128_t)0x2 * (x24 * x20)) + (((uint128_t)x26 * x18) + (((uint128_t)0x2 * (x28 * x16)) + (((uint128_t)x30 * x14) + ((uint128_t)0x2 * (x29 * x12))))))))))))));
-{ uint128_t x43 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x11 * (((uint128_t)x10 * x29) + (((uint128_t)x12 * x30) + (((uint128_t)x14 * x28) + (((uint128_t)x16 * x26) + (((uint128_t)x18 * x24) + (((uint128_t)x20 * x22) + (((uint128_t)x22 * x20) + (((uint128_t)x24 * x18) + (((uint128_t)x26 * x16) + (((uint128_t)x28 * x14) + (((uint128_t)x30 * x12) + ((uint128_t)x29 * x10))))))))))))));
-{ uint128_t x44 = ((((uint128_t)x2 * x6) + (((uint128_t)0x2 * (x4 * x4)) + ((uint128_t)x6 * x2))) + (0x11 * (((uint128_t)0x2 * (x8 * x29)) + (((uint128_t)x10 * x30) + (((uint128_t)0x2 * (x12 * x28)) + (((uint128_t)x14 * x26) + (((uint128_t)0x2 * (x16 * x24)) + (((uint128_t)x18 * x22) + (((uint128_t)0x2 * (x20 * x20)) + (((uint128_t)x22 * x18) + (((uint128_t)0x2 * (x24 * x16)) + (((uint128_t)x26 * x14) + (((uint128_t)0x2 * (x28 * x12)) + (((uint128_t)x30 * x10) + ((uint128_t)0x2 * (x29 * x8))))))))))))))));
-{ uint128_t x45 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * (((uint128_t)x6 * x29) + (((uint128_t)x8 * x30) + (((uint128_t)x10 * x28) + (((uint128_t)x12 * x26) + (((uint128_t)x14 * x24) + (((uint128_t)x16 * x22) + (((uint128_t)x18 * x20) + (((uint128_t)x20 * x18) + (((uint128_t)x22 * x16) + (((uint128_t)x24 * x14) + (((uint128_t)x26 * x12) + (((uint128_t)x28 * x10) + (((uint128_t)x30 * x8) + ((uint128_t)x29 * x6))))))))))))))));
-{ uint128_t x46 = (((uint128_t)x2 * x2) + (0x11 * (((uint128_t)0x2 * (x4 * x29)) + (((uint128_t)x6 * x30) + (((uint128_t)0x2 * (x8 * x28)) + (((uint128_t)x10 * x26) + (((uint128_t)0x2 * (x12 * x24)) + (((uint128_t)x14 * x22) + (((uint128_t)0x2 * (x16 * x20)) + (((uint128_t)x18 * x18) + (((uint128_t)0x2 * (x20 * x16)) + (((uint128_t)x22 * x14) + (((uint128_t)0x2 * (x24 * x12)) + (((uint128_t)x26 * x10) + (((uint128_t)0x2 * (x28 * x8)) + (((uint128_t)x30 * x6) + ((uint128_t)0x2 * (x29 * x4))))))))))))))))));
-{ uint64_t x47 = (uint64_t) (x46 >> 0x1f);
-{ uint64_t x48 = ((uint64_t)x46 & 0x7fffffff);
-{ uint128_t x49 = (x47 + x45);
-{ uint64_t x50 = (uint64_t) (x49 >> 0x1e);
-{ uint64_t x51 = ((uint64_t)x49 & 0x3fffffff);
-{ uint128_t x52 = (x50 + x44);
-{ uint64_t x53 = (uint64_t) (x52 >> 0x1f);
-{ uint64_t x54 = ((uint64_t)x52 & 0x7fffffff);
-{ uint128_t x55 = (x53 + x43);
-{ uint64_t x56 = (uint64_t) (x55 >> 0x1e);
-{ uint64_t x57 = ((uint64_t)x55 & 0x3fffffff);
-{ uint128_t x58 = (x56 + x42);
-{ uint64_t x59 = (uint64_t) (x58 >> 0x1f);
-{ uint64_t x60 = ((uint64_t)x58 & 0x7fffffff);
-{ uint128_t x61 = (x59 + x41);
-{ uint64_t x62 = (uint64_t) (x61 >> 0x1e);
-{ uint64_t x63 = ((uint64_t)x61 & 0x3fffffff);
-{ uint128_t x64 = (x62 + x40);
-{ uint64_t x65 = (uint64_t) (x64 >> 0x1f);
-{ uint64_t x66 = ((uint64_t)x64 & 0x7fffffff);
-{ uint128_t x67 = (x65 + x39);
-{ uint64_t x68 = (uint64_t) (x67 >> 0x1e);
-{ uint64_t x69 = ((uint64_t)x67 & 0x3fffffff);
-{ uint128_t x70 = (x68 + x38);
-{ uint64_t x71 = (uint64_t) (x70 >> 0x1f);
-{ uint64_t x72 = ((uint64_t)x70 & 0x7fffffff);
-{ uint128_t x73 = (x71 + x37);
-{ uint64_t x74 = (uint64_t) (x73 >> 0x1e);
-{ uint64_t x75 = ((uint64_t)x73 & 0x3fffffff);
-{ uint128_t x76 = (x74 + x36);
-{ uint64_t x77 = (uint64_t) (x76 >> 0x1f);
-{ uint64_t x78 = ((uint64_t)x76 & 0x7fffffff);
-{ uint128_t x79 = (x77 + x35);
-{ uint64_t x80 = (uint64_t) (x79 >> 0x1e);
-{ uint64_t x81 = ((uint64_t)x79 & 0x3fffffff);
-{ uint128_t x82 = (x80 + x34);
-{ uint64_t x83 = (uint64_t) (x82 >> 0x1f);
-{ uint64_t x84 = ((uint64_t)x82 & 0x7fffffff);
-{ uint128_t x85 = (x83 + x33);
-{ uint64_t x86 = (uint64_t) (x85 >> 0x1e);
-{ uint64_t x87 = ((uint64_t)x85 & 0x3fffffff);
-{ uint128_t x88 = (x86 + x32);
-{ uint64_t x89 = (uint64_t) (x88 >> 0x1f);
-{ uint64_t x90 = ((uint64_t)x88 & 0x7fffffff);
-{ uint128_t x91 = (x89 + x31);
-{ uint64_t x92 = (uint64_t) (x91 >> 0x1e);
-{ uint64_t x93 = ((uint64_t)x91 & 0x3fffffff);
-{ uint64_t x94 = (x48 + (0x11 * x92));
-{ uint64_t x95 = (x94 >> 0x1f);
-{ uint64_t x96 = (x94 & 0x7fffffff);
-{ uint64_t x97 = (x95 + x51);
-{ uint64_t x98 = (x97 >> 0x1e);
-{ uint64_t x99 = (x97 & 0x3fffffff);
-out[0] = x93;
-out[1] = x90;
-out[2] = x87;
-out[3] = x84;
-out[4] = x81;
-out[5] = x78;
-out[6] = x75;
-out[7] = x72;
-out[8] = x69;
-out[9] = x66;
-out[10] = x63;
-out[11] = x60;
-out[12] = x57;
-out[13] = x98 + x54;
-out[14] = x99;
-out[15] = x96;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[16];
+static void fesquare(uint64_t out[16], const uint64_t in1[16]) {
+ { const uint64_t x29 = in1[15];
+ { const uint64_t x30 = in1[14];
+ { const uint64_t x28 = in1[13];
+ { const uint64_t x26 = in1[12];
+ { const uint64_t x24 = in1[11];
+ { const uint64_t x22 = in1[10];
+ { const uint64_t x20 = in1[9];
+ { const uint64_t x18 = in1[8];
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x31 = (((uint128_t)x2 * x29) + (((uint128_t)x4 * x30) + (((uint128_t)x6 * x28) + (((uint128_t)x8 * x26) + (((uint128_t)x10 * x24) + (((uint128_t)x12 * x22) + (((uint128_t)x14 * x20) + (((uint128_t)x16 * x18) + (((uint128_t)x18 * x16) + (((uint128_t)x20 * x14) + (((uint128_t)x22 * x12) + (((uint128_t)x24 * x10) + (((uint128_t)x26 * x8) + (((uint128_t)x28 * x6) + (((uint128_t)x30 * x4) + ((uint128_t)x29 * x2))))))))))))))));
+ { uint128_t x32 = ((((uint128_t)x2 * x30) + (((uint128_t)0x2 * (x4 * x28)) + (((uint128_t)x6 * x26) + (((uint128_t)0x2 * (x8 * x24)) + (((uint128_t)x10 * x22) + (((uint128_t)0x2 * (x12 * x20)) + (((uint128_t)x14 * x18) + (((uint128_t)0x2 * (x16 * x16)) + (((uint128_t)x18 * x14) + (((uint128_t)0x2 * (x20 * x12)) + (((uint128_t)x22 * x10) + (((uint128_t)0x2 * (x24 * x8)) + (((uint128_t)x26 * x6) + (((uint128_t)0x2 * (x28 * x4)) + ((uint128_t)x30 * x2))))))))))))))) + (0x11 * ((uint128_t)0x2 * (x29 * x29))));
+ { uint128_t x33 = ((((uint128_t)x2 * x28) + (((uint128_t)x4 * x26) + (((uint128_t)x6 * x24) + (((uint128_t)x8 * x22) + (((uint128_t)x10 * x20) + (((uint128_t)x12 * x18) + (((uint128_t)x14 * x16) + (((uint128_t)x16 * x14) + (((uint128_t)x18 * x12) + (((uint128_t)x20 * x10) + (((uint128_t)x22 * x8) + (((uint128_t)x24 * x6) + (((uint128_t)x26 * x4) + ((uint128_t)x28 * x2)))))))))))))) + (0x11 * (((uint128_t)x30 * x29) + ((uint128_t)x29 * x30))));
+ { uint128_t x34 = ((((uint128_t)x2 * x26) + (((uint128_t)0x2 * (x4 * x24)) + (((uint128_t)x6 * x22) + (((uint128_t)0x2 * (x8 * x20)) + (((uint128_t)x10 * x18) + (((uint128_t)0x2 * (x12 * x16)) + (((uint128_t)x14 * x14) + (((uint128_t)0x2 * (x16 * x12)) + (((uint128_t)x18 * x10) + (((uint128_t)0x2 * (x20 * x8)) + (((uint128_t)x22 * x6) + (((uint128_t)0x2 * (x24 * x4)) + ((uint128_t)x26 * x2))))))))))))) + (0x11 * (((uint128_t)0x2 * (x28 * x29)) + (((uint128_t)x30 * x30) + ((uint128_t)0x2 * (x29 * x28))))));
+ { uint128_t x35 = ((((uint128_t)x2 * x24) + (((uint128_t)x4 * x22) + (((uint128_t)x6 * x20) + (((uint128_t)x8 * x18) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + (((uint128_t)x18 * x8) + (((uint128_t)x20 * x6) + (((uint128_t)x22 * x4) + ((uint128_t)x24 * x2)))))))))))) + (0x11 * (((uint128_t)x26 * x29) + (((uint128_t)x28 * x30) + (((uint128_t)x30 * x28) + ((uint128_t)x29 * x26))))));
+ { uint128_t x36 = ((((uint128_t)x2 * x22) + (((uint128_t)0x2 * (x4 * x20)) + (((uint128_t)x6 * x18) + (((uint128_t)0x2 * (x8 * x16)) + (((uint128_t)x10 * x14) + (((uint128_t)0x2 * (x12 * x12)) + (((uint128_t)x14 * x10) + (((uint128_t)0x2 * (x16 * x8)) + (((uint128_t)x18 * x6) + (((uint128_t)0x2 * (x20 * x4)) + ((uint128_t)x22 * x2))))))))))) + (0x11 * (((uint128_t)0x2 * (x24 * x29)) + (((uint128_t)x26 * x30) + (((uint128_t)0x2 * (x28 * x28)) + (((uint128_t)x30 * x26) + ((uint128_t)0x2 * (x29 * x24))))))));
+ { uint128_t x37 = ((((uint128_t)x2 * x20) + (((uint128_t)x4 * x18) + (((uint128_t)x6 * x16) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + (((uint128_t)x16 * x6) + (((uint128_t)x18 * x4) + ((uint128_t)x20 * x2)))))))))) + (0x11 * (((uint128_t)x22 * x29) + (((uint128_t)x24 * x30) + (((uint128_t)x26 * x28) + (((uint128_t)x28 * x26) + (((uint128_t)x30 * x24) + ((uint128_t)x29 * x22))))))));
+ { uint128_t x38 = ((((uint128_t)x2 * x18) + (((uint128_t)0x2 * (x4 * x16)) + (((uint128_t)x6 * x14) + (((uint128_t)0x2 * (x8 * x12)) + (((uint128_t)x10 * x10) + (((uint128_t)0x2 * (x12 * x8)) + (((uint128_t)x14 * x6) + (((uint128_t)0x2 * (x16 * x4)) + ((uint128_t)x18 * x2))))))))) + (0x11 * (((uint128_t)0x2 * (x20 * x29)) + (((uint128_t)x22 * x30) + (((uint128_t)0x2 * (x24 * x28)) + (((uint128_t)x26 * x26) + (((uint128_t)0x2 * (x28 * x24)) + (((uint128_t)x30 * x22) + ((uint128_t)0x2 * (x29 * x20))))))))));
+ { uint128_t x39 = ((((uint128_t)x2 * x16) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x16 * x2)))))))) + (0x11 * (((uint128_t)x18 * x29) + (((uint128_t)x20 * x30) + (((uint128_t)x22 * x28) + (((uint128_t)x24 * x26) + (((uint128_t)x26 * x24) + (((uint128_t)x28 * x22) + (((uint128_t)x30 * x20) + ((uint128_t)x29 * x18))))))))));
+ { uint128_t x40 = ((((uint128_t)x2 * x14) + (((uint128_t)0x2 * (x4 * x12)) + (((uint128_t)x6 * x10) + (((uint128_t)0x2 * (x8 * x8)) + (((uint128_t)x10 * x6) + (((uint128_t)0x2 * (x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x11 * (((uint128_t)0x2 * (x16 * x29)) + (((uint128_t)x18 * x30) + (((uint128_t)0x2 * (x20 * x28)) + (((uint128_t)x22 * x26) + (((uint128_t)0x2 * (x24 * x24)) + (((uint128_t)x26 * x22) + (((uint128_t)0x2 * (x28 * x20)) + (((uint128_t)x30 * x18) + ((uint128_t)0x2 * (x29 * x16))))))))))));
+ { uint128_t x41 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x11 * (((uint128_t)x14 * x29) + (((uint128_t)x16 * x30) + (((uint128_t)x18 * x28) + (((uint128_t)x20 * x26) + (((uint128_t)x22 * x24) + (((uint128_t)x24 * x22) + (((uint128_t)x26 * x20) + (((uint128_t)x28 * x18) + (((uint128_t)x30 * x16) + ((uint128_t)x29 * x14))))))))))));
+ { uint128_t x42 = ((((uint128_t)x2 * x10) + (((uint128_t)0x2 * (x4 * x8)) + (((uint128_t)x6 * x6) + (((uint128_t)0x2 * (x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x11 * (((uint128_t)0x2 * (x12 * x29)) + (((uint128_t)x14 * x30) + (((uint128_t)0x2 * (x16 * x28)) + (((uint128_t)x18 * x26) + (((uint128_t)0x2 * (x20 * x24)) + (((uint128_t)x22 * x22) + (((uint128_t)0x2 * (x24 * x20)) + (((uint128_t)x26 * x18) + (((uint128_t)0x2 * (x28 * x16)) + (((uint128_t)x30 * x14) + ((uint128_t)0x2 * (x29 * x12))))))))))))));
+ { uint128_t x43 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x11 * (((uint128_t)x10 * x29) + (((uint128_t)x12 * x30) + (((uint128_t)x14 * x28) + (((uint128_t)x16 * x26) + (((uint128_t)x18 * x24) + (((uint128_t)x20 * x22) + (((uint128_t)x22 * x20) + (((uint128_t)x24 * x18) + (((uint128_t)x26 * x16) + (((uint128_t)x28 * x14) + (((uint128_t)x30 * x12) + ((uint128_t)x29 * x10))))))))))))));
+ { uint128_t x44 = ((((uint128_t)x2 * x6) + (((uint128_t)0x2 * (x4 * x4)) + ((uint128_t)x6 * x2))) + (0x11 * (((uint128_t)0x2 * (x8 * x29)) + (((uint128_t)x10 * x30) + (((uint128_t)0x2 * (x12 * x28)) + (((uint128_t)x14 * x26) + (((uint128_t)0x2 * (x16 * x24)) + (((uint128_t)x18 * x22) + (((uint128_t)0x2 * (x20 * x20)) + (((uint128_t)x22 * x18) + (((uint128_t)0x2 * (x24 * x16)) + (((uint128_t)x26 * x14) + (((uint128_t)0x2 * (x28 * x12)) + (((uint128_t)x30 * x10) + ((uint128_t)0x2 * (x29 * x8))))))))))))))));
+ { uint128_t x45 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * (((uint128_t)x6 * x29) + (((uint128_t)x8 * x30) + (((uint128_t)x10 * x28) + (((uint128_t)x12 * x26) + (((uint128_t)x14 * x24) + (((uint128_t)x16 * x22) + (((uint128_t)x18 * x20) + (((uint128_t)x20 * x18) + (((uint128_t)x22 * x16) + (((uint128_t)x24 * x14) + (((uint128_t)x26 * x12) + (((uint128_t)x28 * x10) + (((uint128_t)x30 * x8) + ((uint128_t)x29 * x6))))))))))))))));
+ { uint128_t x46 = (((uint128_t)x2 * x2) + (0x11 * (((uint128_t)0x2 * (x4 * x29)) + (((uint128_t)x6 * x30) + (((uint128_t)0x2 * (x8 * x28)) + (((uint128_t)x10 * x26) + (((uint128_t)0x2 * (x12 * x24)) + (((uint128_t)x14 * x22) + (((uint128_t)0x2 * (x16 * x20)) + (((uint128_t)x18 * x18) + (((uint128_t)0x2 * (x20 * x16)) + (((uint128_t)x22 * x14) + (((uint128_t)0x2 * (x24 * x12)) + (((uint128_t)x26 * x10) + (((uint128_t)0x2 * (x28 * x8)) + (((uint128_t)x30 * x6) + ((uint128_t)0x2 * (x29 * x4))))))))))))))))));
+ { uint64_t x47 = (uint64_t) (x46 >> 0x1f);
+ { uint64_t x48 = ((uint64_t)x46 & 0x7fffffff);
+ { uint128_t x49 = (x47 + x45);
+ { uint64_t x50 = (uint64_t) (x49 >> 0x1e);
+ { uint64_t x51 = ((uint64_t)x49 & 0x3fffffff);
+ { uint128_t x52 = (x50 + x44);
+ { uint64_t x53 = (uint64_t) (x52 >> 0x1f);
+ { uint64_t x54 = ((uint64_t)x52 & 0x7fffffff);
+ { uint128_t x55 = (x53 + x43);
+ { uint64_t x56 = (uint64_t) (x55 >> 0x1e);
+ { uint64_t x57 = ((uint64_t)x55 & 0x3fffffff);
+ { uint128_t x58 = (x56 + x42);
+ { uint64_t x59 = (uint64_t) (x58 >> 0x1f);
+ { uint64_t x60 = ((uint64_t)x58 & 0x7fffffff);
+ { uint128_t x61 = (x59 + x41);
+ { uint64_t x62 = (uint64_t) (x61 >> 0x1e);
+ { uint64_t x63 = ((uint64_t)x61 & 0x3fffffff);
+ { uint128_t x64 = (x62 + x40);
+ { uint64_t x65 = (uint64_t) (x64 >> 0x1f);
+ { uint64_t x66 = ((uint64_t)x64 & 0x7fffffff);
+ { uint128_t x67 = (x65 + x39);
+ { uint64_t x68 = (uint64_t) (x67 >> 0x1e);
+ { uint64_t x69 = ((uint64_t)x67 & 0x3fffffff);
+ { uint128_t x70 = (x68 + x38);
+ { uint64_t x71 = (uint64_t) (x70 >> 0x1f);
+ { uint64_t x72 = ((uint64_t)x70 & 0x7fffffff);
+ { uint128_t x73 = (x71 + x37);
+ { uint64_t x74 = (uint64_t) (x73 >> 0x1e);
+ { uint64_t x75 = ((uint64_t)x73 & 0x3fffffff);
+ { uint128_t x76 = (x74 + x36);
+ { uint64_t x77 = (uint64_t) (x76 >> 0x1f);
+ { uint64_t x78 = ((uint64_t)x76 & 0x7fffffff);
+ { uint128_t x79 = (x77 + x35);
+ { uint64_t x80 = (uint64_t) (x79 >> 0x1e);
+ { uint64_t x81 = ((uint64_t)x79 & 0x3fffffff);
+ { uint128_t x82 = (x80 + x34);
+ { uint64_t x83 = (uint64_t) (x82 >> 0x1f);
+ { uint64_t x84 = ((uint64_t)x82 & 0x7fffffff);
+ { uint128_t x85 = (x83 + x33);
+ { uint64_t x86 = (uint64_t) (x85 >> 0x1e);
+ { uint64_t x87 = ((uint64_t)x85 & 0x3fffffff);
+ { uint128_t x88 = (x86 + x32);
+ { uint64_t x89 = (uint64_t) (x88 >> 0x1f);
+ { uint64_t x90 = ((uint64_t)x88 & 0x7fffffff);
+ { uint128_t x91 = (x89 + x31);
+ { uint64_t x92 = (uint64_t) (x91 >> 0x1e);
+ { uint64_t x93 = ((uint64_t)x91 & 0x3fffffff);
+ { uint64_t x94 = (x48 + (0x11 * x92));
+ { uint64_t x95 = (x94 >> 0x1f);
+ { uint64_t x96 = (x94 & 0x7fffffff);
+ { uint64_t x97 = (x95 + x51);
+ { uint64_t x98 = (x97 >> 0x1e);
+ { uint64_t x99 = (x97 & 0x3fffffff);
+ out[0] = x96;
+ out[1] = x99;
+ out[2] = (x98 + x54);
+ out[3] = x57;
+ out[4] = x60;
+ out[5] = x63;
+ out[6] = x66;
+ out[7] = x69;
+ out[8] = x72;
+ out[9] = x75;
+ out[10] = x78;
+ out[11] = x81;
+ out[12] = x84;
+ out[13] = x87;
+ out[14] = x90;
+ out[15] = x93;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e488m17/freeze.c b/src/Specific/solinas64_2e488m17/freeze.c
index 15e32ebf7..c12930031 100644
--- a/src/Specific/solinas64_2e488m17/freeze.c
+++ b/src/Specific/solinas64_2e488m17/freeze.c
@@ -1,25 +1,84 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x32;
-out[1] = uint8_t x33 = Op Syntax.SubWithGetBorrow 31 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = Const 2147483631;;
+static void freeze(uint64_t out[16], const uint64_t in1[16]) {
+ { const uint64_t x29 = in1[15];
+ { const uint64_t x30 = in1[14];
+ { const uint64_t x28 = in1[13];
+ { const uint64_t x26 = in1[12];
+ { const uint64_t x24 = in1[11];
+ { const uint64_t x22 = in1[10];
+ { const uint64_t x20 = in1[9];
+ { const uint64_t x18 = in1[8];
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x7fffffef);
+ { uint64_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x33, Return x4, 0x3fffffff);
+ { uint64_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x36, Return x6, 0x7fffffff);
+ { uint64_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x39, Return x8, 0x3fffffff);
+ { uint64_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x42, Return x10, 0x7fffffff);
+ { uint64_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x45, Return x12, 0x3fffffff);
+ { uint64_t x50, uint8_t x51 = Op (Syntax.SubWithGetBorrow 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x48, Return x14, 0x7fffffff);
+ { uint64_t x53, uint8_t x54 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x51, Return x16, 0x3fffffff);
+ { uint64_t x56, uint8_t x57 = Op (Syntax.SubWithGetBorrow 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x54, Return x18, 0x7fffffff);
+ { uint64_t x59, uint8_t x60 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x57, Return x20, 0x3fffffff);
+ { uint64_t x62, uint8_t x63 = Op (Syntax.SubWithGetBorrow 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x60, Return x22, 0x7fffffff);
+ { uint64_t x65, uint8_t x66 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x63, Return x24, 0x3fffffff);
+ { uint64_t x68, uint8_t x69 = Op (Syntax.SubWithGetBorrow 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x66, Return x26, 0x7fffffff);
+ { uint64_t x71, uint8_t x72 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x69, Return x28, 0x3fffffff);
+ { uint64_t x74, uint8_t x75 = Op (Syntax.SubWithGetBorrow 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x72, Return x30, 0x7fffffff);
+ { uint64_t x77, uint8_t x78 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x75, Return x29, 0x3fffffff);
+ { uint64_t x79 = (uint64_t)cmovznz(x78, 0x0, 0xffffffffffffffffL);
+ { uint64_t x80 = (x79 & 0x7fffffef);
+ { uint64_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x32, Return x80);
+ { uint64_t x84 = (x79 & 0x3fffffff);
+ { uint64_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x83, Return x35, Return x84);
+ { uint64_t x88 = (x79 & 0x7fffffff);
+ { uint64_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x87, Return x38, Return x88);
+ { uint64_t x92 = (x79 & 0x3fffffff);
+ { uint64_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x91, Return x41, Return x92);
+ { uint64_t x96 = (x79 & 0x7fffffff);
+ { uint64_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x95, Return x44, Return x96);
+ { uint64_t x100 = (x79 & 0x3fffffff);
+ { uint64_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x99, Return x47, Return x100);
+ { uint64_t x104 = (x79 & 0x7fffffff);
+ { uint64_t x106, uint8_t x107 = Op (Syntax.AddWithGetCarry 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x103, Return x50, Return x104);
+ { uint64_t x108 = (x79 & 0x3fffffff);
+ { uint64_t x110, uint8_t x111 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x107, Return x53, Return x108);
+ { uint64_t x112 = (x79 & 0x7fffffff);
+ { uint64_t x114, uint8_t x115 = Op (Syntax.AddWithGetCarry 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x111, Return x56, Return x112);
+ { uint64_t x116 = (x79 & 0x3fffffff);
+ { uint64_t x118, uint8_t x119 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x115, Return x59, Return x116);
+ { uint64_t x120 = (x79 & 0x7fffffff);
+ { uint64_t x122, uint8_t x123 = Op (Syntax.AddWithGetCarry 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x119, Return x62, Return x120);
+ { uint64_t x124 = (x79 & 0x3fffffff);
+ { uint64_t x126, uint8_t x127 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x123, Return x65, Return x124);
+ { uint64_t x128 = (x79 & 0x7fffffff);
+ { uint64_t x130, uint8_t x131 = Op (Syntax.AddWithGetCarry 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x127, Return x68, Return x128);
+ { uint64_t x132 = (x79 & 0x3fffffff);
+ { uint64_t x134, uint8_t x135 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x131, Return x71, Return x132);
+ { uint64_t x136 = (x79 & 0x7fffffff);
+ { uint64_t x138, uint8_t x139 = Op (Syntax.AddWithGetCarry 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x135, Return x74, Return x136);
+ { uint64_t x140 = (x79 & 0x3fffffff);
+ { uint64_t x142, uint8_t _ = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x139, Return x77, Return x140);
+ out[0] = x82;
+ out[1] = x86;
+ out[2] = x90;
+ out[3] = x94;
+ out[4] = x98;
+ out[5] = x102;
+ out[6] = x106;
+ out[7] = x110;
+ out[8] = x114;
+ out[9] = x118;
+ out[10] = x122;
+ out[11] = x126;
+ out[12] = x130;
+ out[13] = x134;
+ out[14] = x138;
+ out[15] = x142;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e489m21/femul.c b/src/Specific/solinas64_2e489m21/femul.c
index 411c9ef12..28f7e7869 100644
--- a/src/Specific/solinas64_2e489m21/femul.c
+++ b/src/Specific/solinas64_2e489m21/femul.c
@@ -1,71 +1,71 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
-{ uint128_t x36 = (((uint128_t)x5 * x34) + ((0x2 * ((uint128_t)x7 * x35)) + (((uint128_t)x9 * x33) + (((uint128_t)x11 * x31) + ((0x2 * ((uint128_t)x13 * x29)) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + ((0x2 * ((uint128_t)x19 * x23)) + ((uint128_t)x18 * x21)))))))));
-{ uint128_t x37 = ((((uint128_t)x5 * x35) + (((uint128_t)x7 * x33) + (((uint128_t)x9 * x31) + (((uint128_t)x11 * x29) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + (((uint128_t)x17 * x23) + ((uint128_t)x19 * x21)))))))) + (0x15 * ((uint128_t)x18 * x34)));
-{ uint128_t x38 = ((((uint128_t)x5 * x33) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + (((uint128_t)x11 * x27) + ((0x2 * ((uint128_t)x13 * x25)) + ((0x2 * ((uint128_t)x15 * x23)) + ((uint128_t)x17 * x21))))))) + (0x15 * ((0x2 * ((uint128_t)x19 * x34)) + (0x2 * ((uint128_t)x18 * x35)))));
-{ uint128_t x39 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + (((uint128_t)x9 * x27) + (((uint128_t)x11 * x25) + ((0x2 * ((uint128_t)x13 * x23)) + ((uint128_t)x15 * x21)))))) + (0x15 * (((uint128_t)x17 * x34) + ((0x2 * ((uint128_t)x19 * x35)) + ((uint128_t)x18 * x33)))));
-{ uint128_t x40 = ((((uint128_t)x5 * x29) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + ((uint128_t)x13 * x21))))) + (0x15 * (((uint128_t)x15 * x34) + (((uint128_t)x17 * x35) + (((uint128_t)x19 * x33) + ((uint128_t)x18 * x31))))));
-{ uint128_t x41 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((0x2 * ((uint128_t)x9 * x23)) + ((uint128_t)x11 * x21)))) + (0x15 * ((0x2 * ((uint128_t)x13 * x34)) + ((0x2 * ((uint128_t)x15 * x35)) + (((uint128_t)x17 * x33) + ((0x2 * ((uint128_t)x19 * x31)) + (0x2 * ((uint128_t)x18 * x29))))))));
-{ uint128_t x42 = ((((uint128_t)x5 * x25) + ((0x2 * ((uint128_t)x7 * x23)) + ((uint128_t)x9 * x21))) + (0x15 * (((uint128_t)x11 * x34) + ((0x2 * ((uint128_t)x13 * x35)) + (((uint128_t)x15 * x33) + (((uint128_t)x17 * x31) + ((0x2 * ((uint128_t)x19 * x29)) + ((uint128_t)x18 * x27))))))));
-{ uint128_t x43 = ((((uint128_t)x5 * x23) + ((uint128_t)x7 * x21)) + (0x15 * (((uint128_t)x9 * x34) + (((uint128_t)x11 * x35) + (((uint128_t)x13 * x33) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + (((uint128_t)x19 * x27) + ((uint128_t)x18 * x25)))))))));
-{ uint128_t x44 = (((uint128_t)x5 * x21) + (0x15 * ((0x2 * ((uint128_t)x7 * x34)) + ((0x2 * ((uint128_t)x9 * x35)) + (((uint128_t)x11 * x33) + ((0x2 * ((uint128_t)x13 * x31)) + ((0x2 * ((uint128_t)x15 * x29)) + (((uint128_t)x17 * x27) + ((0x2 * ((uint128_t)x19 * x25)) + (0x2 * ((uint128_t)x18 * x23)))))))))));
-{ uint128_t x45 = (x44 >> 0x37);
-{ uint64_t x46 = ((uint64_t)x44 & 0x7fffffffffffff);
-{ uint128_t x47 = (x45 + x43);
-{ uint128_t x48 = (x47 >> 0x36);
-{ uint64_t x49 = ((uint64_t)x47 & 0x3fffffffffffff);
-{ uint128_t x50 = (x48 + x42);
-{ uint128_t x51 = (x50 >> 0x36);
-{ uint64_t x52 = ((uint64_t)x50 & 0x3fffffffffffff);
-{ uint128_t x53 = (x51 + x41);
-{ uint128_t x54 = (x53 >> 0x37);
-{ uint64_t x55 = ((uint64_t)x53 & 0x7fffffffffffff);
-{ uint128_t x56 = (x54 + x40);
-{ uint128_t x57 = (x56 >> 0x36);
-{ uint64_t x58 = ((uint64_t)x56 & 0x3fffffffffffff);
-{ uint128_t x59 = (x57 + x39);
-{ uint128_t x60 = (x59 >> 0x36);
-{ uint64_t x61 = ((uint64_t)x59 & 0x3fffffffffffff);
-{ uint128_t x62 = (x60 + x38);
-{ uint64_t x63 = (uint64_t) (x62 >> 0x37);
-{ uint64_t x64 = ((uint64_t)x62 & 0x7fffffffffffff);
-{ uint128_t x65 = (x63 + x37);
-{ uint64_t x66 = (uint64_t) (x65 >> 0x36);
-{ uint64_t x67 = ((uint64_t)x65 & 0x3fffffffffffff);
-{ uint128_t x68 = (x66 + x36);
-{ uint64_t x69 = (uint64_t) (x68 >> 0x36);
-{ uint64_t x70 = ((uint64_t)x68 & 0x3fffffffffffff);
-{ uint128_t x71 = (x46 + ((uint128_t)0x15 * x69));
-{ uint64_t x72 = (uint64_t) (x71 >> 0x37);
-{ uint64_t x73 = ((uint64_t)x71 & 0x7fffffffffffff);
-{ uint64_t x74 = (x72 + x49);
-{ uint64_t x75 = (x74 >> 0x36);
-{ uint64_t x76 = (x74 & 0x3fffffffffffff);
-out[0] = x70;
-out[1] = x67;
-out[2] = x64;
-out[3] = x61;
-out[4] = x58;
-out[5] = x55;
-out[6] = x75 + x52;
-out[7] = x76;
-out[8] = x73;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void femul(uint64_t out[9], const uint64_t in1[9], const uint64_t in2[9]) {
+ { const uint64_t x18 = in1[8];
+ { const uint64_t x19 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x34 = in2[8];
+ { const uint64_t x35 = in2[7];
+ { const uint64_t x33 = in2[6];
+ { const uint64_t x31 = in2[5];
+ { const uint64_t x29 = in2[4];
+ { const uint64_t x27 = in2[3];
+ { const uint64_t x25 = in2[2];
+ { const uint64_t x23 = in2[1];
+ { const uint64_t x21 = in2[0];
+ { uint128_t x36 = (((uint128_t)x5 * x34) + ((0x2 * ((uint128_t)x7 * x35)) + (((uint128_t)x9 * x33) + (((uint128_t)x11 * x31) + ((0x2 * ((uint128_t)x13 * x29)) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + ((0x2 * ((uint128_t)x19 * x23)) + ((uint128_t)x18 * x21)))))))));
+ { uint128_t x37 = ((((uint128_t)x5 * x35) + (((uint128_t)x7 * x33) + (((uint128_t)x9 * x31) + (((uint128_t)x11 * x29) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + (((uint128_t)x17 * x23) + ((uint128_t)x19 * x21)))))))) + (0x15 * ((uint128_t)x18 * x34)));
+ { uint128_t x38 = ((((uint128_t)x5 * x33) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + (((uint128_t)x11 * x27) + ((0x2 * ((uint128_t)x13 * x25)) + ((0x2 * ((uint128_t)x15 * x23)) + ((uint128_t)x17 * x21))))))) + (0x15 * ((0x2 * ((uint128_t)x19 * x34)) + (0x2 * ((uint128_t)x18 * x35)))));
+ { uint128_t x39 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + (((uint128_t)x9 * x27) + (((uint128_t)x11 * x25) + ((0x2 * ((uint128_t)x13 * x23)) + ((uint128_t)x15 * x21)))))) + (0x15 * (((uint128_t)x17 * x34) + ((0x2 * ((uint128_t)x19 * x35)) + ((uint128_t)x18 * x33)))));
+ { uint128_t x40 = ((((uint128_t)x5 * x29) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + ((uint128_t)x13 * x21))))) + (0x15 * (((uint128_t)x15 * x34) + (((uint128_t)x17 * x35) + (((uint128_t)x19 * x33) + ((uint128_t)x18 * x31))))));
+ { uint128_t x41 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((0x2 * ((uint128_t)x9 * x23)) + ((uint128_t)x11 * x21)))) + (0x15 * ((0x2 * ((uint128_t)x13 * x34)) + ((0x2 * ((uint128_t)x15 * x35)) + (((uint128_t)x17 * x33) + ((0x2 * ((uint128_t)x19 * x31)) + (0x2 * ((uint128_t)x18 * x29))))))));
+ { uint128_t x42 = ((((uint128_t)x5 * x25) + ((0x2 * ((uint128_t)x7 * x23)) + ((uint128_t)x9 * x21))) + (0x15 * (((uint128_t)x11 * x34) + ((0x2 * ((uint128_t)x13 * x35)) + (((uint128_t)x15 * x33) + (((uint128_t)x17 * x31) + ((0x2 * ((uint128_t)x19 * x29)) + ((uint128_t)x18 * x27))))))));
+ { uint128_t x43 = ((((uint128_t)x5 * x23) + ((uint128_t)x7 * x21)) + (0x15 * (((uint128_t)x9 * x34) + (((uint128_t)x11 * x35) + (((uint128_t)x13 * x33) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + (((uint128_t)x19 * x27) + ((uint128_t)x18 * x25)))))))));
+ { uint128_t x44 = (((uint128_t)x5 * x21) + (0x15 * ((0x2 * ((uint128_t)x7 * x34)) + ((0x2 * ((uint128_t)x9 * x35)) + (((uint128_t)x11 * x33) + ((0x2 * ((uint128_t)x13 * x31)) + ((0x2 * ((uint128_t)x15 * x29)) + (((uint128_t)x17 * x27) + ((0x2 * ((uint128_t)x19 * x25)) + (0x2 * ((uint128_t)x18 * x23)))))))))));
+ { uint128_t x45 = (x44 >> 0x37);
+ { uint64_t x46 = ((uint64_t)x44 & 0x7fffffffffffff);
+ { uint128_t x47 = (x45 + x43);
+ { uint128_t x48 = (x47 >> 0x36);
+ { uint64_t x49 = ((uint64_t)x47 & 0x3fffffffffffff);
+ { uint128_t x50 = (x48 + x42);
+ { uint128_t x51 = (x50 >> 0x36);
+ { uint64_t x52 = ((uint64_t)x50 & 0x3fffffffffffff);
+ { uint128_t x53 = (x51 + x41);
+ { uint128_t x54 = (x53 >> 0x37);
+ { uint64_t x55 = ((uint64_t)x53 & 0x7fffffffffffff);
+ { uint128_t x56 = (x54 + x40);
+ { uint128_t x57 = (x56 >> 0x36);
+ { uint64_t x58 = ((uint64_t)x56 & 0x3fffffffffffff);
+ { uint128_t x59 = (x57 + x39);
+ { uint128_t x60 = (x59 >> 0x36);
+ { uint64_t x61 = ((uint64_t)x59 & 0x3fffffffffffff);
+ { uint128_t x62 = (x60 + x38);
+ { uint64_t x63 = (uint64_t) (x62 >> 0x37);
+ { uint64_t x64 = ((uint64_t)x62 & 0x7fffffffffffff);
+ { uint128_t x65 = (x63 + x37);
+ { uint64_t x66 = (uint64_t) (x65 >> 0x36);
+ { uint64_t x67 = ((uint64_t)x65 & 0x3fffffffffffff);
+ { uint128_t x68 = (x66 + x36);
+ { uint64_t x69 = (uint64_t) (x68 >> 0x36);
+ { uint64_t x70 = ((uint64_t)x68 & 0x3fffffffffffff);
+ { uint128_t x71 = (x46 + ((uint128_t)0x15 * x69));
+ { uint64_t x72 = (uint64_t) (x71 >> 0x37);
+ { uint64_t x73 = ((uint64_t)x71 & 0x7fffffffffffff);
+ { uint64_t x74 = (x72 + x49);
+ { uint64_t x75 = (x74 >> 0x36);
+ { uint64_t x76 = (x74 & 0x3fffffffffffff);
+ out[0] = x73;
+ out[1] = x76;
+ out[2] = (x75 + x52);
+ out[3] = x55;
+ out[4] = x58;
+ out[5] = x61;
+ out[6] = x64;
+ out[7] = x67;
+ out[8] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e489m21/fesquare.c b/src/Specific/solinas64_2e489m21/fesquare.c
index 841b6c705..2da18a58f 100644
--- a/src/Specific/solinas64_2e489m21/fesquare.c
+++ b/src/Specific/solinas64_2e489m21/fesquare.c
@@ -1,71 +1,62 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x17 = (((uint128_t)x2 * x15) + ((0x2 * ((uint128_t)x4 * x16)) + (((uint128_t)x6 * x14) + (((uint128_t)x8 * x12) + ((0x2 * ((uint128_t)x10 * x10)) + (((uint128_t)x12 * x8) + (((uint128_t)x14 * x6) + ((0x2 * ((uint128_t)x16 * x4)) + ((uint128_t)x15 * x2)))))))));
-{ uint128_t x18 = ((((uint128_t)x2 * x16) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x16 * x2)))))))) + (0x15 * ((uint128_t)x15 * x15)));
-{ uint128_t x19 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + ((0x2 * ((uint128_t)x6 * x10)) + (((uint128_t)x8 * x8) + ((0x2 * ((uint128_t)x10 * x6)) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x15 * ((0x2 * ((uint128_t)x16 * x15)) + (0x2 * ((uint128_t)x15 * x16)))));
-{ uint128_t x20 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0x15 * (((uint128_t)x14 * x15) + ((0x2 * ((uint128_t)x16 * x16)) + ((uint128_t)x15 * x14)))));
-{ uint128_t x21 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0x15 * (((uint128_t)x12 * x15) + (((uint128_t)x14 * x16) + (((uint128_t)x16 * x14) + ((uint128_t)x15 * x12))))));
-{ uint128_t x22 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x15 * ((0x2 * ((uint128_t)x10 * x15)) + ((0x2 * ((uint128_t)x12 * x16)) + (((uint128_t)x14 * x14) + ((0x2 * ((uint128_t)x16 * x12)) + (0x2 * ((uint128_t)x15 * x10))))))));
-{ uint128_t x23 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x15 * (((uint128_t)x8 * x15) + ((0x2 * ((uint128_t)x10 * x16)) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((0x2 * ((uint128_t)x16 * x10)) + ((uint128_t)x15 * x8))))))));
-{ uint128_t x24 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x15 * (((uint128_t)x6 * x15) + (((uint128_t)x8 * x16) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + (((uint128_t)x14 * x10) + (((uint128_t)x16 * x8) + ((uint128_t)x15 * x6)))))))));
-{ uint128_t x25 = (((uint128_t)x2 * x2) + (0x15 * ((0x2 * ((uint128_t)x4 * x15)) + ((0x2 * ((uint128_t)x6 * x16)) + (((uint128_t)x8 * x14) + ((0x2 * ((uint128_t)x10 * x12)) + ((0x2 * ((uint128_t)x12 * x10)) + (((uint128_t)x14 * x8) + ((0x2 * ((uint128_t)x16 * x6)) + (0x2 * ((uint128_t)x15 * x4)))))))))));
-{ uint128_t x26 = (x25 >> 0x37);
-{ uint64_t x27 = ((uint64_t)x25 & 0x7fffffffffffff);
-{ uint128_t x28 = (x26 + x24);
-{ uint128_t x29 = (x28 >> 0x36);
-{ uint64_t x30 = ((uint64_t)x28 & 0x3fffffffffffff);
-{ uint128_t x31 = (x29 + x23);
-{ uint128_t x32 = (x31 >> 0x36);
-{ uint64_t x33 = ((uint64_t)x31 & 0x3fffffffffffff);
-{ uint128_t x34 = (x32 + x22);
-{ uint128_t x35 = (x34 >> 0x37);
-{ uint64_t x36 = ((uint64_t)x34 & 0x7fffffffffffff);
-{ uint128_t x37 = (x35 + x21);
-{ uint128_t x38 = (x37 >> 0x36);
-{ uint64_t x39 = ((uint64_t)x37 & 0x3fffffffffffff);
-{ uint128_t x40 = (x38 + x20);
-{ uint128_t x41 = (x40 >> 0x36);
-{ uint64_t x42 = ((uint64_t)x40 & 0x3fffffffffffff);
-{ uint128_t x43 = (x41 + x19);
-{ uint64_t x44 = (uint64_t) (x43 >> 0x37);
-{ uint64_t x45 = ((uint64_t)x43 & 0x7fffffffffffff);
-{ uint128_t x46 = (x44 + x18);
-{ uint64_t x47 = (uint64_t) (x46 >> 0x36);
-{ uint64_t x48 = ((uint64_t)x46 & 0x3fffffffffffff);
-{ uint128_t x49 = (x47 + x17);
-{ uint64_t x50 = (uint64_t) (x49 >> 0x36);
-{ uint64_t x51 = ((uint64_t)x49 & 0x3fffffffffffff);
-{ uint128_t x52 = (x27 + ((uint128_t)0x15 * x50));
-{ uint64_t x53 = (uint64_t) (x52 >> 0x37);
-{ uint64_t x54 = ((uint64_t)x52 & 0x7fffffffffffff);
-{ uint64_t x55 = (x53 + x30);
-{ uint64_t x56 = (x55 >> 0x36);
-{ uint64_t x57 = (x55 & 0x3fffffffffffff);
-out[0] = x51;
-out[1] = x48;
-out[2] = x45;
-out[3] = x42;
-out[4] = x39;
-out[5] = x36;
-out[6] = x56 + x33;
-out[7] = x57;
-out[8] = x54;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void fesquare(uint64_t out[9], const uint64_t in1[9]) {
+ { const uint64_t x15 = in1[8];
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x17 = (((uint128_t)x2 * x15) + ((0x2 * ((uint128_t)x4 * x16)) + (((uint128_t)x6 * x14) + (((uint128_t)x8 * x12) + ((0x2 * ((uint128_t)x10 * x10)) + (((uint128_t)x12 * x8) + (((uint128_t)x14 * x6) + ((0x2 * ((uint128_t)x16 * x4)) + ((uint128_t)x15 * x2)))))))));
+ { uint128_t x18 = ((((uint128_t)x2 * x16) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x16 * x2)))))))) + (0x15 * ((uint128_t)x15 * x15)));
+ { uint128_t x19 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + ((0x2 * ((uint128_t)x6 * x10)) + (((uint128_t)x8 * x8) + ((0x2 * ((uint128_t)x10 * x6)) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x15 * ((0x2 * ((uint128_t)x16 * x15)) + (0x2 * ((uint128_t)x15 * x16)))));
+ { uint128_t x20 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0x15 * (((uint128_t)x14 * x15) + ((0x2 * ((uint128_t)x16 * x16)) + ((uint128_t)x15 * x14)))));
+ { uint128_t x21 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0x15 * (((uint128_t)x12 * x15) + (((uint128_t)x14 * x16) + (((uint128_t)x16 * x14) + ((uint128_t)x15 * x12))))));
+ { uint128_t x22 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x15 * ((0x2 * ((uint128_t)x10 * x15)) + ((0x2 * ((uint128_t)x12 * x16)) + (((uint128_t)x14 * x14) + ((0x2 * ((uint128_t)x16 * x12)) + (0x2 * ((uint128_t)x15 * x10))))))));
+ { uint128_t x23 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x15 * (((uint128_t)x8 * x15) + ((0x2 * ((uint128_t)x10 * x16)) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((0x2 * ((uint128_t)x16 * x10)) + ((uint128_t)x15 * x8))))))));
+ { uint128_t x24 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x15 * (((uint128_t)x6 * x15) + (((uint128_t)x8 * x16) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + (((uint128_t)x14 * x10) + (((uint128_t)x16 * x8) + ((uint128_t)x15 * x6)))))))));
+ { uint128_t x25 = (((uint128_t)x2 * x2) + (0x15 * ((0x2 * ((uint128_t)x4 * x15)) + ((0x2 * ((uint128_t)x6 * x16)) + (((uint128_t)x8 * x14) + ((0x2 * ((uint128_t)x10 * x12)) + ((0x2 * ((uint128_t)x12 * x10)) + (((uint128_t)x14 * x8) + ((0x2 * ((uint128_t)x16 * x6)) + (0x2 * ((uint128_t)x15 * x4)))))))))));
+ { uint128_t x26 = (x25 >> 0x37);
+ { uint64_t x27 = ((uint64_t)x25 & 0x7fffffffffffff);
+ { uint128_t x28 = (x26 + x24);
+ { uint128_t x29 = (x28 >> 0x36);
+ { uint64_t x30 = ((uint64_t)x28 & 0x3fffffffffffff);
+ { uint128_t x31 = (x29 + x23);
+ { uint128_t x32 = (x31 >> 0x36);
+ { uint64_t x33 = ((uint64_t)x31 & 0x3fffffffffffff);
+ { uint128_t x34 = (x32 + x22);
+ { uint128_t x35 = (x34 >> 0x37);
+ { uint64_t x36 = ((uint64_t)x34 & 0x7fffffffffffff);
+ { uint128_t x37 = (x35 + x21);
+ { uint128_t x38 = (x37 >> 0x36);
+ { uint64_t x39 = ((uint64_t)x37 & 0x3fffffffffffff);
+ { uint128_t x40 = (x38 + x20);
+ { uint128_t x41 = (x40 >> 0x36);
+ { uint64_t x42 = ((uint64_t)x40 & 0x3fffffffffffff);
+ { uint128_t x43 = (x41 + x19);
+ { uint64_t x44 = (uint64_t) (x43 >> 0x37);
+ { uint64_t x45 = ((uint64_t)x43 & 0x7fffffffffffff);
+ { uint128_t x46 = (x44 + x18);
+ { uint64_t x47 = (uint64_t) (x46 >> 0x36);
+ { uint64_t x48 = ((uint64_t)x46 & 0x3fffffffffffff);
+ { uint128_t x49 = (x47 + x17);
+ { uint64_t x50 = (uint64_t) (x49 >> 0x36);
+ { uint64_t x51 = ((uint64_t)x49 & 0x3fffffffffffff);
+ { uint128_t x52 = (x27 + ((uint128_t)0x15 * x50));
+ { uint64_t x53 = (uint64_t) (x52 >> 0x37);
+ { uint64_t x54 = ((uint64_t)x52 & 0x7fffffffffffff);
+ { uint64_t x55 = (x53 + x30);
+ { uint64_t x56 = (x55 >> 0x36);
+ { uint64_t x57 = (x55 & 0x3fffffffffffff);
+ out[0] = x54;
+ out[1] = x57;
+ out[2] = (x56 + x33);
+ out[3] = x36;
+ out[4] = x39;
+ out[5] = x42;
+ out[6] = x45;
+ out[7] = x48;
+ out[8] = x51;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e489m21/freeze.c b/src/Specific/solinas64_2e489m21/freeze.c
index 61dceee8a..0b5c3eb3c 100644
--- a/src/Specific/solinas64_2e489m21/freeze.c
+++ b/src/Specific/solinas64_2e489m21/freeze.c
@@ -1,25 +1,49 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x18;
-out[1] = uint8_t x19 = Op Syntax.SubWithGetBorrow 55 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7fffffffffffeb;;
+static void freeze(uint64_t out[9], const uint64_t in1[9]) {
+ { const uint64_t x15 = in1[8];
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x7fffffffffffeb);
+ { uint64_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x19, Return x4, 0x3fffffffffffff);
+ { uint64_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x22, Return x6, 0x3fffffffffffff);
+ { uint64_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x25, Return x8, 0x7fffffffffffff);
+ { uint64_t x30, uint8_t x31 = Op (Syntax.SubWithGetBorrow 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x28, Return x10, 0x3fffffffffffff);
+ { uint64_t x33, uint8_t x34 = Op (Syntax.SubWithGetBorrow 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x31, Return x12, 0x3fffffffffffff);
+ { uint64_t x36, uint8_t x37 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x34, Return x14, 0x7fffffffffffff);
+ { uint64_t x39, uint8_t x40 = Op (Syntax.SubWithGetBorrow 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x37, Return x16, 0x3fffffffffffff);
+ { uint64_t x42, uint8_t x43 = Op (Syntax.SubWithGetBorrow 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x40, Return x15, 0x3fffffffffffff);
+ { uint64_t x44 = (uint64_t)cmovznz(x43, 0x0, 0xffffffffffffffffL);
+ { uint64_t x45 = (x44 & 0x7fffffffffffeb);
+ { uint64_t x47, uint8_t x48 = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x18, Return x45);
+ { uint64_t x49 = (x44 & 0x3fffffffffffff);
+ { uint64_t x51, uint8_t x52 = Op (Syntax.AddWithGetCarry 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x48, Return x21, Return x49);
+ { uint64_t x53 = (x44 & 0x3fffffffffffff);
+ { uint64_t x55, uint8_t x56 = Op (Syntax.AddWithGetCarry 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x52, Return x24, Return x53);
+ { uint64_t x57 = (x44 & 0x7fffffffffffff);
+ { uint64_t x59, uint8_t x60 = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x56, Return x27, Return x57);
+ { uint64_t x61 = (x44 & 0x3fffffffffffff);
+ { uint64_t x63, uint8_t x64 = Op (Syntax.AddWithGetCarry 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x60, Return x30, Return x61);
+ { uint64_t x65 = (x44 & 0x3fffffffffffff);
+ { uint64_t x67, uint8_t x68 = Op (Syntax.AddWithGetCarry 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x64, Return x33, Return x65);
+ { uint64_t x69 = (x44 & 0x7fffffffffffff);
+ { uint64_t x71, uint8_t x72 = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x68, Return x36, Return x69);
+ { uint64_t x73 = (x44 & 0x3fffffffffffff);
+ { uint64_t x75, uint8_t x76 = Op (Syntax.AddWithGetCarry 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x72, Return x39, Return x73);
+ { uint64_t x77 = (x44 & 0x3fffffffffffff);
+ { uint64_t x79, uint8_t _ = Op (Syntax.AddWithGetCarry 54 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x76, Return x42, Return x77);
+ out[0] = x47;
+ out[1] = x51;
+ out[2] = x55;
+ out[3] = x59;
+ out[4] = x63;
+ out[5] = x67;
+ out[6] = x71;
+ out[7] = x75;
+ out[8] = x79;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e495m31/femul.c b/src/Specific/solinas64_2e495m31/femul.c
index b950c4c96..2f2ec67e9 100644
--- a/src/Specific/solinas64_2e495m31/femul.c
+++ b/src/Specific/solinas64_2e495m31/femul.c
@@ -1,71 +1,71 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
-{ uint128_t x36 = (((uint128_t)x5 * x34) + (((uint128_t)x7 * x35) + (((uint128_t)x9 * x33) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + (((uint128_t)x19 * x23) + ((uint128_t)x18 * x21)))))))));
-{ uint128_t x37 = ((((uint128_t)x5 * x35) + (((uint128_t)x7 * x33) + (((uint128_t)x9 * x31) + (((uint128_t)x11 * x29) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + (((uint128_t)x17 * x23) + ((uint128_t)x19 * x21)))))))) + (0x1f * ((uint128_t)x18 * x34)));
-{ uint128_t x38 = ((((uint128_t)x5 * x33) + (((uint128_t)x7 * x31) + (((uint128_t)x9 * x29) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + ((uint128_t)x17 * x21))))))) + (0x1f * (((uint128_t)x19 * x34) + ((uint128_t)x18 * x35))));
-{ uint128_t x39 = ((((uint128_t)x5 * x31) + (((uint128_t)x7 * x29) + (((uint128_t)x9 * x27) + (((uint128_t)x11 * x25) + (((uint128_t)x13 * x23) + ((uint128_t)x15 * x21)))))) + (0x1f * (((uint128_t)x17 * x34) + (((uint128_t)x19 * x35) + ((uint128_t)x18 * x33)))));
-{ uint128_t x40 = ((((uint128_t)x5 * x29) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + ((uint128_t)x13 * x21))))) + (0x1f * (((uint128_t)x15 * x34) + (((uint128_t)x17 * x35) + (((uint128_t)x19 * x33) + ((uint128_t)x18 * x31))))));
-{ uint128_t x41 = ((((uint128_t)x5 * x27) + (((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21)))) + (0x1f * (((uint128_t)x13 * x34) + (((uint128_t)x15 * x35) + (((uint128_t)x17 * x33) + (((uint128_t)x19 * x31) + ((uint128_t)x18 * x29)))))));
-{ uint128_t x42 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + ((uint128_t)x9 * x21))) + (0x1f * (((uint128_t)x11 * x34) + (((uint128_t)x13 * x35) + (((uint128_t)x15 * x33) + (((uint128_t)x17 * x31) + (((uint128_t)x19 * x29) + ((uint128_t)x18 * x27))))))));
-{ uint128_t x43 = ((((uint128_t)x5 * x23) + ((uint128_t)x7 * x21)) + (0x1f * (((uint128_t)x9 * x34) + (((uint128_t)x11 * x35) + (((uint128_t)x13 * x33) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + (((uint128_t)x19 * x27) + ((uint128_t)x18 * x25)))))))));
-{ uint128_t x44 = (((uint128_t)x5 * x21) + (0x1f * (((uint128_t)x7 * x34) + (((uint128_t)x9 * x35) + (((uint128_t)x11 * x33) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + (((uint128_t)x17 * x27) + (((uint128_t)x19 * x25) + ((uint128_t)x18 * x23))))))))));
-{ uint128_t x45 = (x44 >> 0x37);
-{ uint64_t x46 = ((uint64_t)x44 & 0x7fffffffffffff);
-{ uint128_t x47 = (x45 + x43);
-{ uint128_t x48 = (x47 >> 0x37);
-{ uint64_t x49 = ((uint64_t)x47 & 0x7fffffffffffff);
-{ uint128_t x50 = (x48 + x42);
-{ uint128_t x51 = (x50 >> 0x37);
-{ uint64_t x52 = ((uint64_t)x50 & 0x7fffffffffffff);
-{ uint128_t x53 = (x51 + x41);
-{ uint128_t x54 = (x53 >> 0x37);
-{ uint64_t x55 = ((uint64_t)x53 & 0x7fffffffffffff);
-{ uint128_t x56 = (x54 + x40);
-{ uint128_t x57 = (x56 >> 0x37);
-{ uint64_t x58 = ((uint64_t)x56 & 0x7fffffffffffff);
-{ uint128_t x59 = (x57 + x39);
-{ uint128_t x60 = (x59 >> 0x37);
-{ uint64_t x61 = ((uint64_t)x59 & 0x7fffffffffffff);
-{ uint128_t x62 = (x60 + x38);
-{ uint128_t x63 = (x62 >> 0x37);
-{ uint64_t x64 = ((uint64_t)x62 & 0x7fffffffffffff);
-{ uint128_t x65 = (x63 + x37);
-{ uint64_t x66 = (uint64_t) (x65 >> 0x37);
-{ uint64_t x67 = ((uint64_t)x65 & 0x7fffffffffffff);
-{ uint128_t x68 = (x66 + x36);
-{ uint64_t x69 = (uint64_t) (x68 >> 0x37);
-{ uint64_t x70 = ((uint64_t)x68 & 0x7fffffffffffff);
-{ uint128_t x71 = (x46 + ((uint128_t)0x1f * x69));
-{ uint64_t x72 = (uint64_t) (x71 >> 0x37);
-{ uint64_t x73 = ((uint64_t)x71 & 0x7fffffffffffff);
-{ uint64_t x74 = (x72 + x49);
-{ uint64_t x75 = (x74 >> 0x37);
-{ uint64_t x76 = (x74 & 0x7fffffffffffff);
-out[0] = x70;
-out[1] = x67;
-out[2] = x64;
-out[3] = x61;
-out[4] = x58;
-out[5] = x55;
-out[6] = x75 + x52;
-out[7] = x76;
-out[8] = x73;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void femul(uint64_t out[9], const uint64_t in1[9], const uint64_t in2[9]) {
+ { const uint64_t x18 = in1[8];
+ { const uint64_t x19 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x34 = in2[8];
+ { const uint64_t x35 = in2[7];
+ { const uint64_t x33 = in2[6];
+ { const uint64_t x31 = in2[5];
+ { const uint64_t x29 = in2[4];
+ { const uint64_t x27 = in2[3];
+ { const uint64_t x25 = in2[2];
+ { const uint64_t x23 = in2[1];
+ { const uint64_t x21 = in2[0];
+ { uint128_t x36 = (((uint128_t)x5 * x34) + (((uint128_t)x7 * x35) + (((uint128_t)x9 * x33) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + (((uint128_t)x19 * x23) + ((uint128_t)x18 * x21)))))))));
+ { uint128_t x37 = ((((uint128_t)x5 * x35) + (((uint128_t)x7 * x33) + (((uint128_t)x9 * x31) + (((uint128_t)x11 * x29) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + (((uint128_t)x17 * x23) + ((uint128_t)x19 * x21)))))))) + (0x1f * ((uint128_t)x18 * x34)));
+ { uint128_t x38 = ((((uint128_t)x5 * x33) + (((uint128_t)x7 * x31) + (((uint128_t)x9 * x29) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + ((uint128_t)x17 * x21))))))) + (0x1f * (((uint128_t)x19 * x34) + ((uint128_t)x18 * x35))));
+ { uint128_t x39 = ((((uint128_t)x5 * x31) + (((uint128_t)x7 * x29) + (((uint128_t)x9 * x27) + (((uint128_t)x11 * x25) + (((uint128_t)x13 * x23) + ((uint128_t)x15 * x21)))))) + (0x1f * (((uint128_t)x17 * x34) + (((uint128_t)x19 * x35) + ((uint128_t)x18 * x33)))));
+ { uint128_t x40 = ((((uint128_t)x5 * x29) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + ((uint128_t)x13 * x21))))) + (0x1f * (((uint128_t)x15 * x34) + (((uint128_t)x17 * x35) + (((uint128_t)x19 * x33) + ((uint128_t)x18 * x31))))));
+ { uint128_t x41 = ((((uint128_t)x5 * x27) + (((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21)))) + (0x1f * (((uint128_t)x13 * x34) + (((uint128_t)x15 * x35) + (((uint128_t)x17 * x33) + (((uint128_t)x19 * x31) + ((uint128_t)x18 * x29)))))));
+ { uint128_t x42 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + ((uint128_t)x9 * x21))) + (0x1f * (((uint128_t)x11 * x34) + (((uint128_t)x13 * x35) + (((uint128_t)x15 * x33) + (((uint128_t)x17 * x31) + (((uint128_t)x19 * x29) + ((uint128_t)x18 * x27))))))));
+ { uint128_t x43 = ((((uint128_t)x5 * x23) + ((uint128_t)x7 * x21)) + (0x1f * (((uint128_t)x9 * x34) + (((uint128_t)x11 * x35) + (((uint128_t)x13 * x33) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + (((uint128_t)x19 * x27) + ((uint128_t)x18 * x25)))))))));
+ { uint128_t x44 = (((uint128_t)x5 * x21) + (0x1f * (((uint128_t)x7 * x34) + (((uint128_t)x9 * x35) + (((uint128_t)x11 * x33) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + (((uint128_t)x17 * x27) + (((uint128_t)x19 * x25) + ((uint128_t)x18 * x23))))))))));
+ { uint128_t x45 = (x44 >> 0x37);
+ { uint64_t x46 = ((uint64_t)x44 & 0x7fffffffffffff);
+ { uint128_t x47 = (x45 + x43);
+ { uint128_t x48 = (x47 >> 0x37);
+ { uint64_t x49 = ((uint64_t)x47 & 0x7fffffffffffff);
+ { uint128_t x50 = (x48 + x42);
+ { uint128_t x51 = (x50 >> 0x37);
+ { uint64_t x52 = ((uint64_t)x50 & 0x7fffffffffffff);
+ { uint128_t x53 = (x51 + x41);
+ { uint128_t x54 = (x53 >> 0x37);
+ { uint64_t x55 = ((uint64_t)x53 & 0x7fffffffffffff);
+ { uint128_t x56 = (x54 + x40);
+ { uint128_t x57 = (x56 >> 0x37);
+ { uint64_t x58 = ((uint64_t)x56 & 0x7fffffffffffff);
+ { uint128_t x59 = (x57 + x39);
+ { uint128_t x60 = (x59 >> 0x37);
+ { uint64_t x61 = ((uint64_t)x59 & 0x7fffffffffffff);
+ { uint128_t x62 = (x60 + x38);
+ { uint128_t x63 = (x62 >> 0x37);
+ { uint64_t x64 = ((uint64_t)x62 & 0x7fffffffffffff);
+ { uint128_t x65 = (x63 + x37);
+ { uint64_t x66 = (uint64_t) (x65 >> 0x37);
+ { uint64_t x67 = ((uint64_t)x65 & 0x7fffffffffffff);
+ { uint128_t x68 = (x66 + x36);
+ { uint64_t x69 = (uint64_t) (x68 >> 0x37);
+ { uint64_t x70 = ((uint64_t)x68 & 0x7fffffffffffff);
+ { uint128_t x71 = (x46 + ((uint128_t)0x1f * x69));
+ { uint64_t x72 = (uint64_t) (x71 >> 0x37);
+ { uint64_t x73 = ((uint64_t)x71 & 0x7fffffffffffff);
+ { uint64_t x74 = (x72 + x49);
+ { uint64_t x75 = (x74 >> 0x37);
+ { uint64_t x76 = (x74 & 0x7fffffffffffff);
+ out[0] = x73;
+ out[1] = x76;
+ out[2] = (x75 + x52);
+ out[3] = x55;
+ out[4] = x58;
+ out[5] = x61;
+ out[6] = x64;
+ out[7] = x67;
+ out[8] = x70;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e495m31/fesquare.c b/src/Specific/solinas64_2e495m31/fesquare.c
index dcf85abaa..3fec90126 100644
--- a/src/Specific/solinas64_2e495m31/fesquare.c
+++ b/src/Specific/solinas64_2e495m31/fesquare.c
@@ -1,71 +1,62 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x17 = (((uint128_t)x2 * x15) + (((uint128_t)x4 * x16) + (((uint128_t)x6 * x14) + (((uint128_t)x8 * x12) + (((uint128_t)x10 * x10) + (((uint128_t)x12 * x8) + (((uint128_t)x14 * x6) + (((uint128_t)x16 * x4) + ((uint128_t)x15 * x2)))))))));
-{ uint128_t x18 = ((((uint128_t)x2 * x16) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x16 * x2)))))))) + (0x1f * ((uint128_t)x15 * x15)));
-{ uint128_t x19 = ((((uint128_t)x2 * x14) + (((uint128_t)x4 * x12) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + (((uint128_t)x12 * x4) + ((uint128_t)x14 * x2))))))) + (0x1f * (((uint128_t)x16 * x15) + ((uint128_t)x15 * x16))));
-{ uint128_t x20 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x1f * (((uint128_t)x14 * x15) + (((uint128_t)x16 * x16) + ((uint128_t)x15 * x14)))));
-{ uint128_t x21 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0x1f * (((uint128_t)x12 * x15) + (((uint128_t)x14 * x16) + (((uint128_t)x16 * x14) + ((uint128_t)x15 * x12))))));
-{ uint128_t x22 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x1f * (((uint128_t)x10 * x15) + (((uint128_t)x12 * x16) + (((uint128_t)x14 * x14) + (((uint128_t)x16 * x12) + ((uint128_t)x15 * x10)))))));
-{ uint128_t x23 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x1f * (((uint128_t)x8 * x15) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + ((uint128_t)x15 * x8))))))));
-{ uint128_t x24 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x1f * (((uint128_t)x6 * x15) + (((uint128_t)x8 * x16) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + (((uint128_t)x14 * x10) + (((uint128_t)x16 * x8) + ((uint128_t)x15 * x6)))))))));
-{ uint128_t x25 = (((uint128_t)x2 * x2) + (0x1f * (((uint128_t)x4 * x15) + (((uint128_t)x6 * x16) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + (((uint128_t)x16 * x6) + ((uint128_t)x15 * x4))))))))));
-{ uint128_t x26 = (x25 >> 0x37);
-{ uint64_t x27 = ((uint64_t)x25 & 0x7fffffffffffff);
-{ uint128_t x28 = (x26 + x24);
-{ uint128_t x29 = (x28 >> 0x37);
-{ uint64_t x30 = ((uint64_t)x28 & 0x7fffffffffffff);
-{ uint128_t x31 = (x29 + x23);
-{ uint128_t x32 = (x31 >> 0x37);
-{ uint64_t x33 = ((uint64_t)x31 & 0x7fffffffffffff);
-{ uint128_t x34 = (x32 + x22);
-{ uint128_t x35 = (x34 >> 0x37);
-{ uint64_t x36 = ((uint64_t)x34 & 0x7fffffffffffff);
-{ uint128_t x37 = (x35 + x21);
-{ uint128_t x38 = (x37 >> 0x37);
-{ uint64_t x39 = ((uint64_t)x37 & 0x7fffffffffffff);
-{ uint128_t x40 = (x38 + x20);
-{ uint128_t x41 = (x40 >> 0x37);
-{ uint64_t x42 = ((uint64_t)x40 & 0x7fffffffffffff);
-{ uint128_t x43 = (x41 + x19);
-{ uint128_t x44 = (x43 >> 0x37);
-{ uint64_t x45 = ((uint64_t)x43 & 0x7fffffffffffff);
-{ uint128_t x46 = (x44 + x18);
-{ uint64_t x47 = (uint64_t) (x46 >> 0x37);
-{ uint64_t x48 = ((uint64_t)x46 & 0x7fffffffffffff);
-{ uint128_t x49 = (x47 + x17);
-{ uint64_t x50 = (uint64_t) (x49 >> 0x37);
-{ uint64_t x51 = ((uint64_t)x49 & 0x7fffffffffffff);
-{ uint128_t x52 = (x27 + ((uint128_t)0x1f * x50));
-{ uint64_t x53 = (uint64_t) (x52 >> 0x37);
-{ uint64_t x54 = ((uint64_t)x52 & 0x7fffffffffffff);
-{ uint64_t x55 = (x53 + x30);
-{ uint64_t x56 = (x55 >> 0x37);
-{ uint64_t x57 = (x55 & 0x7fffffffffffff);
-out[0] = x51;
-out[1] = x48;
-out[2] = x45;
-out[3] = x42;
-out[4] = x39;
-out[5] = x36;
-out[6] = x56 + x33;
-out[7] = x57;
-out[8] = x54;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[9];
+static void fesquare(uint64_t out[9], const uint64_t in1[9]) {
+ { const uint64_t x15 = in1[8];
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x17 = (((uint128_t)x2 * x15) + (((uint128_t)x4 * x16) + (((uint128_t)x6 * x14) + (((uint128_t)x8 * x12) + (((uint128_t)x10 * x10) + (((uint128_t)x12 * x8) + (((uint128_t)x14 * x6) + (((uint128_t)x16 * x4) + ((uint128_t)x15 * x2)))))))));
+ { uint128_t x18 = ((((uint128_t)x2 * x16) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x16 * x2)))))))) + (0x1f * ((uint128_t)x15 * x15)));
+ { uint128_t x19 = ((((uint128_t)x2 * x14) + (((uint128_t)x4 * x12) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + (((uint128_t)x12 * x4) + ((uint128_t)x14 * x2))))))) + (0x1f * (((uint128_t)x16 * x15) + ((uint128_t)x15 * x16))));
+ { uint128_t x20 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x1f * (((uint128_t)x14 * x15) + (((uint128_t)x16 * x16) + ((uint128_t)x15 * x14)))));
+ { uint128_t x21 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0x1f * (((uint128_t)x12 * x15) + (((uint128_t)x14 * x16) + (((uint128_t)x16 * x14) + ((uint128_t)x15 * x12))))));
+ { uint128_t x22 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x1f * (((uint128_t)x10 * x15) + (((uint128_t)x12 * x16) + (((uint128_t)x14 * x14) + (((uint128_t)x16 * x12) + ((uint128_t)x15 * x10)))))));
+ { uint128_t x23 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x1f * (((uint128_t)x8 * x15) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + ((uint128_t)x15 * x8))))))));
+ { uint128_t x24 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x1f * (((uint128_t)x6 * x15) + (((uint128_t)x8 * x16) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + (((uint128_t)x14 * x10) + (((uint128_t)x16 * x8) + ((uint128_t)x15 * x6)))))))));
+ { uint128_t x25 = (((uint128_t)x2 * x2) + (0x1f * (((uint128_t)x4 * x15) + (((uint128_t)x6 * x16) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + (((uint128_t)x16 * x6) + ((uint128_t)x15 * x4))))))))));
+ { uint128_t x26 = (x25 >> 0x37);
+ { uint64_t x27 = ((uint64_t)x25 & 0x7fffffffffffff);
+ { uint128_t x28 = (x26 + x24);
+ { uint128_t x29 = (x28 >> 0x37);
+ { uint64_t x30 = ((uint64_t)x28 & 0x7fffffffffffff);
+ { uint128_t x31 = (x29 + x23);
+ { uint128_t x32 = (x31 >> 0x37);
+ { uint64_t x33 = ((uint64_t)x31 & 0x7fffffffffffff);
+ { uint128_t x34 = (x32 + x22);
+ { uint128_t x35 = (x34 >> 0x37);
+ { uint64_t x36 = ((uint64_t)x34 & 0x7fffffffffffff);
+ { uint128_t x37 = (x35 + x21);
+ { uint128_t x38 = (x37 >> 0x37);
+ { uint64_t x39 = ((uint64_t)x37 & 0x7fffffffffffff);
+ { uint128_t x40 = (x38 + x20);
+ { uint128_t x41 = (x40 >> 0x37);
+ { uint64_t x42 = ((uint64_t)x40 & 0x7fffffffffffff);
+ { uint128_t x43 = (x41 + x19);
+ { uint128_t x44 = (x43 >> 0x37);
+ { uint64_t x45 = ((uint64_t)x43 & 0x7fffffffffffff);
+ { uint128_t x46 = (x44 + x18);
+ { uint64_t x47 = (uint64_t) (x46 >> 0x37);
+ { uint64_t x48 = ((uint64_t)x46 & 0x7fffffffffffff);
+ { uint128_t x49 = (x47 + x17);
+ { uint64_t x50 = (uint64_t) (x49 >> 0x37);
+ { uint64_t x51 = ((uint64_t)x49 & 0x7fffffffffffff);
+ { uint128_t x52 = (x27 + ((uint128_t)0x1f * x50));
+ { uint64_t x53 = (uint64_t) (x52 >> 0x37);
+ { uint64_t x54 = ((uint64_t)x52 & 0x7fffffffffffff);
+ { uint64_t x55 = (x53 + x30);
+ { uint64_t x56 = (x55 >> 0x37);
+ { uint64_t x57 = (x55 & 0x7fffffffffffff);
+ out[0] = x54;
+ out[1] = x57;
+ out[2] = (x56 + x33);
+ out[3] = x36;
+ out[4] = x39;
+ out[5] = x42;
+ out[6] = x45;
+ out[7] = x48;
+ out[8] = x51;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e495m31/freeze.c b/src/Specific/solinas64_2e495m31/freeze.c
index 2825b688d..ea6896c00 100644
--- a/src/Specific/solinas64_2e495m31/freeze.c
+++ b/src/Specific/solinas64_2e495m31/freeze.c
@@ -1,25 +1,49 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x18;
-out[1] = uint8_t x19 = Op Syntax.SubWithGetBorrow 55 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x7fffffffffffe1;;
+static void freeze(uint64_t out[9], const uint64_t in1[9]) {
+ { const uint64_t x15 = in1[8];
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x18, uint8_t x19 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x7fffffffffffe1);
+ { uint64_t x21, uint8_t x22 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x19, Return x4, 0x7fffffffffffff);
+ { uint64_t x24, uint8_t x25 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x22, Return x6, 0x7fffffffffffff);
+ { uint64_t x27, uint8_t x28 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x25, Return x8, 0x7fffffffffffff);
+ { uint64_t x30, uint8_t x31 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x28, Return x10, 0x7fffffffffffff);
+ { uint64_t x33, uint8_t x34 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x31, Return x12, 0x7fffffffffffff);
+ { uint64_t x36, uint8_t x37 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x34, Return x14, 0x7fffffffffffff);
+ { uint64_t x39, uint8_t x40 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x37, Return x16, 0x7fffffffffffff);
+ { uint64_t x42, uint8_t x43 = Op (Syntax.SubWithGetBorrow 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x40, Return x15, 0x7fffffffffffff);
+ { uint64_t x44 = (uint64_t)cmovznz(x43, 0x0, 0xffffffffffffffffL);
+ { uint64_t x45 = (x44 & 0x7fffffffffffe1);
+ { uint64_t x47, uint8_t x48 = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x18, Return x45);
+ { uint64_t x49 = (x44 & 0x7fffffffffffff);
+ { uint64_t x51, uint8_t x52 = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x48, Return x21, Return x49);
+ { uint64_t x53 = (x44 & 0x7fffffffffffff);
+ { uint64_t x55, uint8_t x56 = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x52, Return x24, Return x53);
+ { uint64_t x57 = (x44 & 0x7fffffffffffff);
+ { uint64_t x59, uint8_t x60 = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x56, Return x27, Return x57);
+ { uint64_t x61 = (x44 & 0x7fffffffffffff);
+ { uint64_t x63, uint8_t x64 = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x60, Return x30, Return x61);
+ { uint64_t x65 = (x44 & 0x7fffffffffffff);
+ { uint64_t x67, uint8_t x68 = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x64, Return x33, Return x65);
+ { uint64_t x69 = (x44 & 0x7fffffffffffff);
+ { uint64_t x71, uint8_t x72 = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x68, Return x36, Return x69);
+ { uint64_t x73 = (x44 & 0x7fffffffffffff);
+ { uint64_t x75, uint8_t x76 = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x72, Return x39, Return x73);
+ { uint64_t x77 = (x44 & 0x7fffffffffffff);
+ { uint64_t x79, uint8_t _ = Op (Syntax.AddWithGetCarry 55 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x76, Return x42, Return x77);
+ out[0] = x47;
+ out[1] = x51;
+ out[2] = x55;
+ out[3] = x59;
+ out[4] = x63;
+ out[5] = x67;
+ out[6] = x71;
+ out[7] = x75;
+ out[8] = x79;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e510m290x2e496m1/freeze.c b/src/Specific/solinas64_2e510m290x2e496m1/freeze.c
index d36b70a17..68338738f 100644
--- a/src/Specific/solinas64_2e510m290x2e496m1/freeze.c
+++ b/src/Specific/solinas64_2e510m290x2e496m1/freeze.c
@@ -1,62 +1,54 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint64_t x20; uint8_t x21 = _subborrow_u51(0x0, x2, 0x7ffffffffffff, &x20);
-{ uint64_t x23; uint8_t x24 = _subborrow_u51(x21, x4, 0x7ffffffffffff, &x23);
-{ uint64_t x26; uint8_t x27 = _subborrow_u51(x24, x6, 0x7ffffffffffff, &x26);
-{ uint64_t x29; uint8_t x30 = _subborrow_u51(x27, x8, 0x7ffffffffffff, &x29);
-{ uint64_t x32; uint8_t x33 = _subborrow_u51(x30, x10, 0x7ffffffffffff, &x32);
-{ uint64_t x35; uint8_t x36 = _subborrow_u51(x33, x12, 0x7ffffffffffff, &x35);
-{ uint64_t x38; uint8_t x39 = _subborrow_u51(x36, x14, 0x7ffffffffffff, &x38);
-{ uint64_t x41; uint8_t x42 = _subborrow_u51(x39, x16, 0x7ffffffffffff, &x41);
-{ uint64_t x44; uint8_t x45 = _subborrow_u51(x42, x18, 0x7ffffffffffff, &x44);
-{ uint64_t x47; uint8_t x48 = _subborrow_u51(x45, x17, 0x7dbbfffffffff, &x47);
-{ uint64_t x49 = (uint64_t)cmovznz(x48, 0x0, 0xffffffffffffffffL);
-{ uint64_t x50 = (x49 & 0x7ffffffffffff);
-{ uint64_t x52; uint8_t x53 = _addcarryx_u51(0x0, x20, x50, &x52);
-{ uint64_t x54 = (x49 & 0x7ffffffffffff);
-{ uint64_t x56; uint8_t x57 = _addcarryx_u51(x53, x23, x54, &x56);
-{ uint64_t x58 = (x49 & 0x7ffffffffffff);
-{ uint64_t x60; uint8_t x61 = _addcarryx_u51(x57, x26, x58, &x60);
-{ uint64_t x62 = (x49 & 0x7ffffffffffff);
-{ uint64_t x64; uint8_t x65 = _addcarryx_u51(x61, x29, x62, &x64);
-{ uint64_t x66 = (x49 & 0x7ffffffffffff);
-{ uint64_t x68; uint8_t x69 = _addcarryx_u51(x65, x32, x66, &x68);
-{ uint64_t x70 = (x49 & 0x7ffffffffffff);
-{ uint64_t x72; uint8_t x73 = _addcarryx_u51(x69, x35, x70, &x72);
-{ uint64_t x74 = (x49 & 0x7ffffffffffff);
-{ uint64_t x76; uint8_t x77 = _addcarryx_u51(x73, x38, x74, &x76);
-{ uint64_t x78 = (x49 & 0x7ffffffffffff);
-{ uint64_t x80; uint8_t x81 = _addcarryx_u51(x77, x41, x78, &x80);
-{ uint64_t x82 = (x49 & 0x7ffffffffffff);
-{ uint64_t x84; uint8_t x85 = _addcarryx_u51(x81, x44, x82, &x84);
-{ uint64_t x86 = (x49 & 0x7dbbfffffffff);
-{ uint64_t x88; uint8_t _ = _addcarryx_u51(x85, x47, x86, &x88);
-out[0] = x88;
-out[1] = x84;
-out[2] = x80;
-out[3] = x76;
-out[4] = x72;
-out[5] = x68;
-out[6] = x64;
-out[7] = x60;
-out[8] = x56;
-out[9] = x52;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void freeze(uint64_t out[10], const uint64_t in1[10]) {
+ { const uint64_t x17 = in1[9];
+ { const uint64_t x18 = in1[8];
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x20; uint8_t x21 = _subborrow_u51(0x0, x2, 0x7ffffffffffff, &x20);
+ { uint64_t x23; uint8_t x24 = _subborrow_u51(x21, x4, 0x7ffffffffffff, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u51(x24, x6, 0x7ffffffffffff, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u51(x27, x8, 0x7ffffffffffff, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u51(x30, x10, 0x7ffffffffffff, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u51(x33, x12, 0x7ffffffffffff, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u51(x36, x14, 0x7ffffffffffff, &x38);
+ { uint64_t x41; uint8_t x42 = _subborrow_u51(x39, x16, 0x7ffffffffffff, &x41);
+ { uint64_t x44; uint8_t x45 = _subborrow_u51(x42, x18, 0x7ffffffffffff, &x44);
+ { uint64_t x47; uint8_t x48 = _subborrow_u51(x45, x17, 0x7dbbfffffffff, &x47);
+ { uint64_t x49 = (uint64_t)cmovznz(x48, 0x0, 0xffffffffffffffffL);
+ { uint64_t x50 = (x49 & 0x7ffffffffffff);
+ { uint64_t x52; uint8_t x53 = _addcarryx_u51(0x0, x20, x50, &x52);
+ { uint64_t x54 = (x49 & 0x7ffffffffffff);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u51(x53, x23, x54, &x56);
+ { uint64_t x58 = (x49 & 0x7ffffffffffff);
+ { uint64_t x60; uint8_t x61 = _addcarryx_u51(x57, x26, x58, &x60);
+ { uint64_t x62 = (x49 & 0x7ffffffffffff);
+ { uint64_t x64; uint8_t x65 = _addcarryx_u51(x61, x29, x62, &x64);
+ { uint64_t x66 = (x49 & 0x7ffffffffffff);
+ { uint64_t x68; uint8_t x69 = _addcarryx_u51(x65, x32, x66, &x68);
+ { uint64_t x70 = (x49 & 0x7ffffffffffff);
+ { uint64_t x72; uint8_t x73 = _addcarryx_u51(x69, x35, x70, &x72);
+ { uint64_t x74 = (x49 & 0x7ffffffffffff);
+ { uint64_t x76; uint8_t x77 = _addcarryx_u51(x73, x38, x74, &x76);
+ { uint64_t x78 = (x49 & 0x7ffffffffffff);
+ { uint64_t x80; uint8_t x81 = _addcarryx_u51(x77, x41, x78, &x80);
+ { uint64_t x82 = (x49 & 0x7ffffffffffff);
+ { uint64_t x84; uint8_t x85 = _addcarryx_u51(x81, x44, x82, &x84);
+ { uint64_t x86 = (x49 & 0x7dbbfffffffff);
+ { uint64_t x88; uint8_t _ = _addcarryx_u51(x85, x47, x86, &x88);
+ out[0] = x52;
+ out[1] = x56;
+ out[2] = x60;
+ out[3] = x64;
+ out[4] = x68;
+ out[5] = x72;
+ out[6] = x76;
+ out[7] = x80;
+ out[8] = x84;
+ out[9] = x88;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e511m187/femul.c b/src/Specific/solinas64_2e511m187/femul.c
index 3aad50af3..73bb21edb 100644
--- a/src/Specific/solinas64_2e511m187/femul.c
+++ b/src/Specific/solinas64_2e511m187/femul.c
@@ -1,76 +1,78 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
-{ uint128_t x40 = (((uint128_t)x5 * x38) + ((0x2 * ((uint128_t)x7 * x39)) + ((0x2 * ((uint128_t)x9 * x37)) + ((0x2 * ((uint128_t)x11 * x35)) + ((0x2 * ((uint128_t)x13 * x33)) + ((0x2 * ((uint128_t)x15 * x31)) + ((0x2 * ((uint128_t)x17 * x29)) + ((0x2 * ((uint128_t)x19 * x27)) + ((0x2 * ((uint128_t)x21 * x25)) + ((uint128_t)x20 * x23))))))))));
-{ uint128_t x41 = ((((uint128_t)x5 * x39) + ((0x2 * ((uint128_t)x7 * x37)) + ((0x2 * ((uint128_t)x9 * x35)) + ((0x2 * ((uint128_t)x11 * x33)) + ((0x2 * ((uint128_t)x13 * x31)) + ((0x2 * ((uint128_t)x15 * x29)) + ((0x2 * ((uint128_t)x17 * x27)) + ((0x2 * ((uint128_t)x19 * x25)) + ((uint128_t)x21 * x23))))))))) + (0xbb * ((uint128_t)x20 * x38)));
-{ uint128_t x42 = ((((uint128_t)x5 * x37) + ((0x2 * ((uint128_t)x7 * x35)) + ((0x2 * ((uint128_t)x9 * x33)) + ((0x2 * ((uint128_t)x11 * x31)) + ((0x2 * ((uint128_t)x13 * x29)) + ((0x2 * ((uint128_t)x15 * x27)) + ((0x2 * ((uint128_t)x17 * x25)) + ((uint128_t)x19 * x23)))))))) + (0xbb * (((uint128_t)x21 * x38) + ((uint128_t)x20 * x39))));
-{ uint128_t x43 = ((((uint128_t)x5 * x35) + ((0x2 * ((uint128_t)x7 * x33)) + ((0x2 * ((uint128_t)x9 * x31)) + ((0x2 * ((uint128_t)x11 * x29)) + ((0x2 * ((uint128_t)x13 * x27)) + ((0x2 * ((uint128_t)x15 * x25)) + ((uint128_t)x17 * x23))))))) + (0xbb * (((uint128_t)x19 * x38) + (((uint128_t)x21 * x39) + ((uint128_t)x20 * x37)))));
-{ uint128_t x44 = ((((uint128_t)x5 * x33) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + ((0x2 * ((uint128_t)x11 * x27)) + ((0x2 * ((uint128_t)x13 * x25)) + ((uint128_t)x15 * x23)))))) + (0xbb * (((uint128_t)x17 * x38) + (((uint128_t)x19 * x39) + (((uint128_t)x21 * x37) + ((uint128_t)x20 * x35))))));
-{ uint128_t x45 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((uint128_t)x13 * x23))))) + (0xbb * (((uint128_t)x15 * x38) + (((uint128_t)x17 * x39) + (((uint128_t)x19 * x37) + (((uint128_t)x21 * x35) + ((uint128_t)x20 * x33)))))));
-{ uint128_t x46 = ((((uint128_t)x5 * x29) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((uint128_t)x11 * x23)))) + (0xbb * (((uint128_t)x13 * x38) + (((uint128_t)x15 * x39) + (((uint128_t)x17 * x37) + (((uint128_t)x19 * x35) + (((uint128_t)x21 * x33) + ((uint128_t)x20 * x31))))))));
-{ uint128_t x47 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((uint128_t)x9 * x23))) + (0xbb * (((uint128_t)x11 * x38) + (((uint128_t)x13 * x39) + (((uint128_t)x15 * x37) + (((uint128_t)x17 * x35) + (((uint128_t)x19 * x33) + (((uint128_t)x21 * x31) + ((uint128_t)x20 * x29)))))))));
-{ uint128_t x48 = ((((uint128_t)x5 * x25) + ((uint128_t)x7 * x23)) + (0xbb * (((uint128_t)x9 * x38) + (((uint128_t)x11 * x39) + (((uint128_t)x13 * x37) + (((uint128_t)x15 * x35) + (((uint128_t)x17 * x33) + (((uint128_t)x19 * x31) + (((uint128_t)x21 * x29) + ((uint128_t)x20 * x27))))))))));
-{ uint128_t x49 = (((uint128_t)x5 * x23) + (0xbb * ((0x2 * ((uint128_t)x7 * x38)) + ((0x2 * ((uint128_t)x9 * x39)) + ((0x2 * ((uint128_t)x11 * x37)) + ((0x2 * ((uint128_t)x13 * x35)) + ((0x2 * ((uint128_t)x15 * x33)) + ((0x2 * ((uint128_t)x17 * x31)) + ((0x2 * ((uint128_t)x19 * x29)) + ((0x2 * ((uint128_t)x21 * x27)) + (0x2 * ((uint128_t)x20 * x25))))))))))));
-{ uint128_t x50 = (x49 >> 0x34);
-{ uint64_t x51 = ((uint64_t)x49 & 0xfffffffffffff);
-{ uint128_t x52 = (x50 + x48);
-{ uint128_t x53 = (x52 >> 0x33);
-{ uint64_t x54 = ((uint64_t)x52 & 0x7ffffffffffff);
-{ uint128_t x55 = (x53 + x47);
-{ uint128_t x56 = (x55 >> 0x33);
-{ uint64_t x57 = ((uint64_t)x55 & 0x7ffffffffffff);
-{ uint128_t x58 = (x56 + x46);
-{ uint128_t x59 = (x58 >> 0x33);
-{ uint64_t x60 = ((uint64_t)x58 & 0x7ffffffffffff);
-{ uint128_t x61 = (x59 + x45);
-{ uint128_t x62 = (x61 >> 0x33);
-{ uint64_t x63 = ((uint64_t)x61 & 0x7ffffffffffff);
-{ uint128_t x64 = (x62 + x44);
-{ uint128_t x65 = (x64 >> 0x33);
-{ uint64_t x66 = ((uint64_t)x64 & 0x7ffffffffffff);
-{ uint128_t x67 = (x65 + x43);
-{ uint64_t x68 = (uint64_t) (x67 >> 0x33);
-{ uint64_t x69 = ((uint64_t)x67 & 0x7ffffffffffff);
-{ uint128_t x70 = (x68 + x42);
-{ uint64_t x71 = (uint64_t) (x70 >> 0x33);
-{ uint64_t x72 = ((uint64_t)x70 & 0x7ffffffffffff);
-{ uint128_t x73 = (x71 + x41);
-{ uint64_t x74 = (uint64_t) (x73 >> 0x33);
-{ uint64_t x75 = ((uint64_t)x73 & 0x7ffffffffffff);
-{ uint128_t x76 = (x74 + x40);
-{ uint64_t x77 = (uint64_t) (x76 >> 0x33);
-{ uint64_t x78 = ((uint64_t)x76 & 0x7ffffffffffff);
-{ uint128_t x79 = (x51 + ((uint128_t)0xbb * x77));
-{ uint64_t x80 = (uint64_t) (x79 >> 0x34);
-{ uint64_t x81 = ((uint64_t)x79 & 0xfffffffffffff);
-{ uint64_t x82 = (x80 + x54);
-{ uint64_t x83 = (x82 >> 0x33);
-{ uint64_t x84 = (x82 & 0x7ffffffffffff);
-out[0] = x78;
-out[1] = x75;
-out[2] = x72;
-out[3] = x69;
-out[4] = x66;
-out[5] = x63;
-out[6] = x60;
-out[7] = x83 + x57;
-out[8] = x84;
-out[9] = x81;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void femul(uint64_t out[10], const uint64_t in1[10], const uint64_t in2[10]) {
+ { const uint64_t x20 = in1[9];
+ { const uint64_t x21 = in1[8];
+ { const uint64_t x19 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x38 = in2[9];
+ { const uint64_t x39 = in2[8];
+ { const uint64_t x37 = in2[7];
+ { const uint64_t x35 = in2[6];
+ { const uint64_t x33 = in2[5];
+ { const uint64_t x31 = in2[4];
+ { const uint64_t x29 = in2[3];
+ { const uint64_t x27 = in2[2];
+ { const uint64_t x25 = in2[1];
+ { const uint64_t x23 = in2[0];
+ { uint128_t x40 = (((uint128_t)x5 * x38) + ((0x2 * ((uint128_t)x7 * x39)) + ((0x2 * ((uint128_t)x9 * x37)) + ((0x2 * ((uint128_t)x11 * x35)) + ((0x2 * ((uint128_t)x13 * x33)) + ((0x2 * ((uint128_t)x15 * x31)) + ((0x2 * ((uint128_t)x17 * x29)) + ((0x2 * ((uint128_t)x19 * x27)) + ((0x2 * ((uint128_t)x21 * x25)) + ((uint128_t)x20 * x23))))))))));
+ { uint128_t x41 = ((((uint128_t)x5 * x39) + ((0x2 * ((uint128_t)x7 * x37)) + ((0x2 * ((uint128_t)x9 * x35)) + ((0x2 * ((uint128_t)x11 * x33)) + ((0x2 * ((uint128_t)x13 * x31)) + ((0x2 * ((uint128_t)x15 * x29)) + ((0x2 * ((uint128_t)x17 * x27)) + ((0x2 * ((uint128_t)x19 * x25)) + ((uint128_t)x21 * x23))))))))) + (0xbb * ((uint128_t)x20 * x38)));
+ { uint128_t x42 = ((((uint128_t)x5 * x37) + ((0x2 * ((uint128_t)x7 * x35)) + ((0x2 * ((uint128_t)x9 * x33)) + ((0x2 * ((uint128_t)x11 * x31)) + ((0x2 * ((uint128_t)x13 * x29)) + ((0x2 * ((uint128_t)x15 * x27)) + ((0x2 * ((uint128_t)x17 * x25)) + ((uint128_t)x19 * x23)))))))) + (0xbb * (((uint128_t)x21 * x38) + ((uint128_t)x20 * x39))));
+ { uint128_t x43 = ((((uint128_t)x5 * x35) + ((0x2 * ((uint128_t)x7 * x33)) + ((0x2 * ((uint128_t)x9 * x31)) + ((0x2 * ((uint128_t)x11 * x29)) + ((0x2 * ((uint128_t)x13 * x27)) + ((0x2 * ((uint128_t)x15 * x25)) + ((uint128_t)x17 * x23))))))) + (0xbb * (((uint128_t)x19 * x38) + (((uint128_t)x21 * x39) + ((uint128_t)x20 * x37)))));
+ { uint128_t x44 = ((((uint128_t)x5 * x33) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + ((0x2 * ((uint128_t)x11 * x27)) + ((0x2 * ((uint128_t)x13 * x25)) + ((uint128_t)x15 * x23)))))) + (0xbb * (((uint128_t)x17 * x38) + (((uint128_t)x19 * x39) + (((uint128_t)x21 * x37) + ((uint128_t)x20 * x35))))));
+ { uint128_t x45 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((uint128_t)x13 * x23))))) + (0xbb * (((uint128_t)x15 * x38) + (((uint128_t)x17 * x39) + (((uint128_t)x19 * x37) + (((uint128_t)x21 * x35) + ((uint128_t)x20 * x33)))))));
+ { uint128_t x46 = ((((uint128_t)x5 * x29) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((uint128_t)x11 * x23)))) + (0xbb * (((uint128_t)x13 * x38) + (((uint128_t)x15 * x39) + (((uint128_t)x17 * x37) + (((uint128_t)x19 * x35) + (((uint128_t)x21 * x33) + ((uint128_t)x20 * x31))))))));
+ { uint128_t x47 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((uint128_t)x9 * x23))) + (0xbb * (((uint128_t)x11 * x38) + (((uint128_t)x13 * x39) + (((uint128_t)x15 * x37) + (((uint128_t)x17 * x35) + (((uint128_t)x19 * x33) + (((uint128_t)x21 * x31) + ((uint128_t)x20 * x29)))))))));
+ { uint128_t x48 = ((((uint128_t)x5 * x25) + ((uint128_t)x7 * x23)) + (0xbb * (((uint128_t)x9 * x38) + (((uint128_t)x11 * x39) + (((uint128_t)x13 * x37) + (((uint128_t)x15 * x35) + (((uint128_t)x17 * x33) + (((uint128_t)x19 * x31) + (((uint128_t)x21 * x29) + ((uint128_t)x20 * x27))))))))));
+ { uint128_t x49 = (((uint128_t)x5 * x23) + (0xbb * ((0x2 * ((uint128_t)x7 * x38)) + ((0x2 * ((uint128_t)x9 * x39)) + ((0x2 * ((uint128_t)x11 * x37)) + ((0x2 * ((uint128_t)x13 * x35)) + ((0x2 * ((uint128_t)x15 * x33)) + ((0x2 * ((uint128_t)x17 * x31)) + ((0x2 * ((uint128_t)x19 * x29)) + ((0x2 * ((uint128_t)x21 * x27)) + (0x2 * ((uint128_t)x20 * x25))))))))))));
+ { uint128_t x50 = (x49 >> 0x34);
+ { uint64_t x51 = ((uint64_t)x49 & 0xfffffffffffff);
+ { uint128_t x52 = (x50 + x48);
+ { uint128_t x53 = (x52 >> 0x33);
+ { uint64_t x54 = ((uint64_t)x52 & 0x7ffffffffffff);
+ { uint128_t x55 = (x53 + x47);
+ { uint128_t x56 = (x55 >> 0x33);
+ { uint64_t x57 = ((uint64_t)x55 & 0x7ffffffffffff);
+ { uint128_t x58 = (x56 + x46);
+ { uint128_t x59 = (x58 >> 0x33);
+ { uint64_t x60 = ((uint64_t)x58 & 0x7ffffffffffff);
+ { uint128_t x61 = (x59 + x45);
+ { uint128_t x62 = (x61 >> 0x33);
+ { uint64_t x63 = ((uint64_t)x61 & 0x7ffffffffffff);
+ { uint128_t x64 = (x62 + x44);
+ { uint128_t x65 = (x64 >> 0x33);
+ { uint64_t x66 = ((uint64_t)x64 & 0x7ffffffffffff);
+ { uint128_t x67 = (x65 + x43);
+ { uint64_t x68 = (uint64_t) (x67 >> 0x33);
+ { uint64_t x69 = ((uint64_t)x67 & 0x7ffffffffffff);
+ { uint128_t x70 = (x68 + x42);
+ { uint64_t x71 = (uint64_t) (x70 >> 0x33);
+ { uint64_t x72 = ((uint64_t)x70 & 0x7ffffffffffff);
+ { uint128_t x73 = (x71 + x41);
+ { uint64_t x74 = (uint64_t) (x73 >> 0x33);
+ { uint64_t x75 = ((uint64_t)x73 & 0x7ffffffffffff);
+ { uint128_t x76 = (x74 + x40);
+ { uint64_t x77 = (uint64_t) (x76 >> 0x33);
+ { uint64_t x78 = ((uint64_t)x76 & 0x7ffffffffffff);
+ { uint128_t x79 = (x51 + ((uint128_t)0xbb * x77));
+ { uint64_t x80 = (uint64_t) (x79 >> 0x34);
+ { uint64_t x81 = ((uint64_t)x79 & 0xfffffffffffff);
+ { uint64_t x82 = (x80 + x54);
+ { uint64_t x83 = (x82 >> 0x33);
+ { uint64_t x84 = (x82 & 0x7ffffffffffff);
+ out[0] = x81;
+ out[1] = x84;
+ out[2] = (x83 + x57);
+ out[3] = x60;
+ out[4] = x63;
+ out[5] = x66;
+ out[6] = x69;
+ out[7] = x72;
+ out[8] = x75;
+ out[9] = x78;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e511m187/fesquare.c b/src/Specific/solinas64_2e511m187/fesquare.c
index f76c2ef67..03d4677cd 100644
--- a/src/Specific/solinas64_2e511m187/fesquare.c
+++ b/src/Specific/solinas64_2e511m187/fesquare.c
@@ -1,76 +1,68 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x19 = (((uint128_t)x2 * x17) + ((0x2 * ((uint128_t)x4 * x18)) + ((0x2 * ((uint128_t)x6 * x16)) + ((0x2 * ((uint128_t)x8 * x14)) + ((0x2 * ((uint128_t)x10 * x12)) + ((0x2 * ((uint128_t)x12 * x10)) + ((0x2 * ((uint128_t)x14 * x8)) + ((0x2 * ((uint128_t)x16 * x6)) + ((0x2 * ((uint128_t)x18 * x4)) + ((uint128_t)x17 * x2))))))))));
-{ uint128_t x20 = ((((uint128_t)x2 * x18) + ((0x2 * ((uint128_t)x4 * x16)) + ((0x2 * ((uint128_t)x6 * x14)) + ((0x2 * ((uint128_t)x8 * x12)) + ((0x2 * ((uint128_t)x10 * x10)) + ((0x2 * ((uint128_t)x12 * x8)) + ((0x2 * ((uint128_t)x14 * x6)) + ((0x2 * ((uint128_t)x16 * x4)) + ((uint128_t)x18 * x2))))))))) + (0xbb * ((uint128_t)x17 * x17)));
-{ uint128_t x21 = ((((uint128_t)x2 * x16) + ((0x2 * ((uint128_t)x4 * x14)) + ((0x2 * ((uint128_t)x6 * x12)) + ((0x2 * ((uint128_t)x8 * x10)) + ((0x2 * ((uint128_t)x10 * x8)) + ((0x2 * ((uint128_t)x12 * x6)) + ((0x2 * ((uint128_t)x14 * x4)) + ((uint128_t)x16 * x2)))))))) + (0xbb * (((uint128_t)x18 * x17) + ((uint128_t)x17 * x18))));
-{ uint128_t x22 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + ((0x2 * ((uint128_t)x6 * x10)) + ((0x2 * ((uint128_t)x8 * x8)) + ((0x2 * ((uint128_t)x10 * x6)) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0xbb * (((uint128_t)x16 * x17) + (((uint128_t)x18 * x18) + ((uint128_t)x17 * x16)))));
-{ uint128_t x23 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0xbb * (((uint128_t)x14 * x17) + (((uint128_t)x16 * x18) + (((uint128_t)x18 * x16) + ((uint128_t)x17 * x14))))));
-{ uint128_t x24 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0xbb * (((uint128_t)x12 * x17) + (((uint128_t)x14 * x18) + (((uint128_t)x16 * x16) + (((uint128_t)x18 * x14) + ((uint128_t)x17 * x12)))))));
-{ uint128_t x25 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0xbb * (((uint128_t)x10 * x17) + (((uint128_t)x12 * x18) + (((uint128_t)x14 * x16) + (((uint128_t)x16 * x14) + (((uint128_t)x18 * x12) + ((uint128_t)x17 * x10))))))));
-{ uint128_t x26 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0xbb * (((uint128_t)x8 * x17) + (((uint128_t)x10 * x18) + (((uint128_t)x12 * x16) + (((uint128_t)x14 * x14) + (((uint128_t)x16 * x12) + (((uint128_t)x18 * x10) + ((uint128_t)x17 * x8)))))))));
-{ uint128_t x27 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xbb * (((uint128_t)x6 * x17) + (((uint128_t)x8 * x18) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + (((uint128_t)x18 * x8) + ((uint128_t)x17 * x6))))))))));
-{ uint128_t x28 = (((uint128_t)x2 * x2) + (0xbb * ((0x2 * ((uint128_t)x4 * x17)) + ((0x2 * ((uint128_t)x6 * x18)) + ((0x2 * ((uint128_t)x8 * x16)) + ((0x2 * ((uint128_t)x10 * x14)) + ((0x2 * ((uint128_t)x12 * x12)) + ((0x2 * ((uint128_t)x14 * x10)) + ((0x2 * ((uint128_t)x16 * x8)) + ((0x2 * ((uint128_t)x18 * x6)) + (0x2 * ((uint128_t)x17 * x4))))))))))));
-{ uint128_t x29 = (x28 >> 0x34);
-{ uint64_t x30 = ((uint64_t)x28 & 0xfffffffffffff);
-{ uint128_t x31 = (x29 + x27);
-{ uint128_t x32 = (x31 >> 0x33);
-{ uint64_t x33 = ((uint64_t)x31 & 0x7ffffffffffff);
-{ uint128_t x34 = (x32 + x26);
-{ uint128_t x35 = (x34 >> 0x33);
-{ uint64_t x36 = ((uint64_t)x34 & 0x7ffffffffffff);
-{ uint128_t x37 = (x35 + x25);
-{ uint128_t x38 = (x37 >> 0x33);
-{ uint64_t x39 = ((uint64_t)x37 & 0x7ffffffffffff);
-{ uint128_t x40 = (x38 + x24);
-{ uint128_t x41 = (x40 >> 0x33);
-{ uint64_t x42 = ((uint64_t)x40 & 0x7ffffffffffff);
-{ uint128_t x43 = (x41 + x23);
-{ uint128_t x44 = (x43 >> 0x33);
-{ uint64_t x45 = ((uint64_t)x43 & 0x7ffffffffffff);
-{ uint128_t x46 = (x44 + x22);
-{ uint64_t x47 = (uint64_t) (x46 >> 0x33);
-{ uint64_t x48 = ((uint64_t)x46 & 0x7ffffffffffff);
-{ uint128_t x49 = (x47 + x21);
-{ uint64_t x50 = (uint64_t) (x49 >> 0x33);
-{ uint64_t x51 = ((uint64_t)x49 & 0x7ffffffffffff);
-{ uint128_t x52 = (x50 + x20);
-{ uint64_t x53 = (uint64_t) (x52 >> 0x33);
-{ uint64_t x54 = ((uint64_t)x52 & 0x7ffffffffffff);
-{ uint128_t x55 = (x53 + x19);
-{ uint64_t x56 = (uint64_t) (x55 >> 0x33);
-{ uint64_t x57 = ((uint64_t)x55 & 0x7ffffffffffff);
-{ uint128_t x58 = (x30 + ((uint128_t)0xbb * x56));
-{ uint64_t x59 = (uint64_t) (x58 >> 0x34);
-{ uint64_t x60 = ((uint64_t)x58 & 0xfffffffffffff);
-{ uint64_t x61 = (x59 + x33);
-{ uint64_t x62 = (x61 >> 0x33);
-{ uint64_t x63 = (x61 & 0x7ffffffffffff);
-out[0] = x57;
-out[1] = x54;
-out[2] = x51;
-out[3] = x48;
-out[4] = x45;
-out[5] = x42;
-out[6] = x39;
-out[7] = x62 + x36;
-out[8] = x63;
-out[9] = x60;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void fesquare(uint64_t out[10], const uint64_t in1[10]) {
+ { const uint64_t x17 = in1[9];
+ { const uint64_t x18 = in1[8];
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x19 = (((uint128_t)x2 * x17) + ((0x2 * ((uint128_t)x4 * x18)) + ((0x2 * ((uint128_t)x6 * x16)) + ((0x2 * ((uint128_t)x8 * x14)) + ((0x2 * ((uint128_t)x10 * x12)) + ((0x2 * ((uint128_t)x12 * x10)) + ((0x2 * ((uint128_t)x14 * x8)) + ((0x2 * ((uint128_t)x16 * x6)) + ((0x2 * ((uint128_t)x18 * x4)) + ((uint128_t)x17 * x2))))))))));
+ { uint128_t x20 = ((((uint128_t)x2 * x18) + ((0x2 * ((uint128_t)x4 * x16)) + ((0x2 * ((uint128_t)x6 * x14)) + ((0x2 * ((uint128_t)x8 * x12)) + ((0x2 * ((uint128_t)x10 * x10)) + ((0x2 * ((uint128_t)x12 * x8)) + ((0x2 * ((uint128_t)x14 * x6)) + ((0x2 * ((uint128_t)x16 * x4)) + ((uint128_t)x18 * x2))))))))) + (0xbb * ((uint128_t)x17 * x17)));
+ { uint128_t x21 = ((((uint128_t)x2 * x16) + ((0x2 * ((uint128_t)x4 * x14)) + ((0x2 * ((uint128_t)x6 * x12)) + ((0x2 * ((uint128_t)x8 * x10)) + ((0x2 * ((uint128_t)x10 * x8)) + ((0x2 * ((uint128_t)x12 * x6)) + ((0x2 * ((uint128_t)x14 * x4)) + ((uint128_t)x16 * x2)))))))) + (0xbb * (((uint128_t)x18 * x17) + ((uint128_t)x17 * x18))));
+ { uint128_t x22 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + ((0x2 * ((uint128_t)x6 * x10)) + ((0x2 * ((uint128_t)x8 * x8)) + ((0x2 * ((uint128_t)x10 * x6)) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0xbb * (((uint128_t)x16 * x17) + (((uint128_t)x18 * x18) + ((uint128_t)x17 * x16)))));
+ { uint128_t x23 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0xbb * (((uint128_t)x14 * x17) + (((uint128_t)x16 * x18) + (((uint128_t)x18 * x16) + ((uint128_t)x17 * x14))))));
+ { uint128_t x24 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0xbb * (((uint128_t)x12 * x17) + (((uint128_t)x14 * x18) + (((uint128_t)x16 * x16) + (((uint128_t)x18 * x14) + ((uint128_t)x17 * x12)))))));
+ { uint128_t x25 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0xbb * (((uint128_t)x10 * x17) + (((uint128_t)x12 * x18) + (((uint128_t)x14 * x16) + (((uint128_t)x16 * x14) + (((uint128_t)x18 * x12) + ((uint128_t)x17 * x10))))))));
+ { uint128_t x26 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0xbb * (((uint128_t)x8 * x17) + (((uint128_t)x10 * x18) + (((uint128_t)x12 * x16) + (((uint128_t)x14 * x14) + (((uint128_t)x16 * x12) + (((uint128_t)x18 * x10) + ((uint128_t)x17 * x8)))))))));
+ { uint128_t x27 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xbb * (((uint128_t)x6 * x17) + (((uint128_t)x8 * x18) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + (((uint128_t)x18 * x8) + ((uint128_t)x17 * x6))))))))));
+ { uint128_t x28 = (((uint128_t)x2 * x2) + (0xbb * ((0x2 * ((uint128_t)x4 * x17)) + ((0x2 * ((uint128_t)x6 * x18)) + ((0x2 * ((uint128_t)x8 * x16)) + ((0x2 * ((uint128_t)x10 * x14)) + ((0x2 * ((uint128_t)x12 * x12)) + ((0x2 * ((uint128_t)x14 * x10)) + ((0x2 * ((uint128_t)x16 * x8)) + ((0x2 * ((uint128_t)x18 * x6)) + (0x2 * ((uint128_t)x17 * x4))))))))))));
+ { uint128_t x29 = (x28 >> 0x34);
+ { uint64_t x30 = ((uint64_t)x28 & 0xfffffffffffff);
+ { uint128_t x31 = (x29 + x27);
+ { uint128_t x32 = (x31 >> 0x33);
+ { uint64_t x33 = ((uint64_t)x31 & 0x7ffffffffffff);
+ { uint128_t x34 = (x32 + x26);
+ { uint128_t x35 = (x34 >> 0x33);
+ { uint64_t x36 = ((uint64_t)x34 & 0x7ffffffffffff);
+ { uint128_t x37 = (x35 + x25);
+ { uint128_t x38 = (x37 >> 0x33);
+ { uint64_t x39 = ((uint64_t)x37 & 0x7ffffffffffff);
+ { uint128_t x40 = (x38 + x24);
+ { uint128_t x41 = (x40 >> 0x33);
+ { uint64_t x42 = ((uint64_t)x40 & 0x7ffffffffffff);
+ { uint128_t x43 = (x41 + x23);
+ { uint128_t x44 = (x43 >> 0x33);
+ { uint64_t x45 = ((uint64_t)x43 & 0x7ffffffffffff);
+ { uint128_t x46 = (x44 + x22);
+ { uint64_t x47 = (uint64_t) (x46 >> 0x33);
+ { uint64_t x48 = ((uint64_t)x46 & 0x7ffffffffffff);
+ { uint128_t x49 = (x47 + x21);
+ { uint64_t x50 = (uint64_t) (x49 >> 0x33);
+ { uint64_t x51 = ((uint64_t)x49 & 0x7ffffffffffff);
+ { uint128_t x52 = (x50 + x20);
+ { uint64_t x53 = (uint64_t) (x52 >> 0x33);
+ { uint64_t x54 = ((uint64_t)x52 & 0x7ffffffffffff);
+ { uint128_t x55 = (x53 + x19);
+ { uint64_t x56 = (uint64_t) (x55 >> 0x33);
+ { uint64_t x57 = ((uint64_t)x55 & 0x7ffffffffffff);
+ { uint128_t x58 = (x30 + ((uint128_t)0xbb * x56));
+ { uint64_t x59 = (uint64_t) (x58 >> 0x34);
+ { uint64_t x60 = ((uint64_t)x58 & 0xfffffffffffff);
+ { uint64_t x61 = (x59 + x33);
+ { uint64_t x62 = (x61 >> 0x33);
+ { uint64_t x63 = (x61 & 0x7ffffffffffff);
+ out[0] = x60;
+ out[1] = x63;
+ out[2] = (x62 + x36);
+ out[3] = x39;
+ out[4] = x42;
+ out[5] = x45;
+ out[6] = x48;
+ out[7] = x51;
+ out[8] = x54;
+ out[9] = x57;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e511m187/freeze.c b/src/Specific/solinas64_2e511m187/freeze.c
index 20e1a8cee..b6e52d9b9 100644
--- a/src/Specific/solinas64_2e511m187/freeze.c
+++ b/src/Specific/solinas64_2e511m187/freeze.c
@@ -1,25 +1,54 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x20;
-out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 52 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xfffffffffff45;;
+static void freeze(uint64_t out[10], const uint64_t in1[10]) {
+ { const uint64_t x17 = in1[9];
+ { const uint64_t x18 = in1[8];
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x20, uint8_t x21 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xfffffffffff45);
+ { uint64_t x23; uint8_t x24 = _subborrow_u51(x21, x4, 0x7ffffffffffff, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u51(x24, x6, 0x7ffffffffffff, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u51(x27, x8, 0x7ffffffffffff, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u51(x30, x10, 0x7ffffffffffff, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u51(x33, x12, 0x7ffffffffffff, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u51(x36, x14, 0x7ffffffffffff, &x38);
+ { uint64_t x41; uint8_t x42 = _subborrow_u51(x39, x16, 0x7ffffffffffff, &x41);
+ { uint64_t x44; uint8_t x45 = _subborrow_u51(x42, x18, 0x7ffffffffffff, &x44);
+ { uint64_t x47; uint8_t x48 = _subborrow_u51(x45, x17, 0x7ffffffffffff, &x47);
+ { uint64_t x49 = (uint64_t)cmovznz(x48, 0x0, 0xffffffffffffffffL);
+ { uint64_t x50 = (x49 & 0xfffffffffff45);
+ { uint64_t x52, uint8_t x53 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x20, Return x50);
+ { uint64_t x54 = (x49 & 0x7ffffffffffff);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u51(x53, x23, x54, &x56);
+ { uint64_t x58 = (x49 & 0x7ffffffffffff);
+ { uint64_t x60; uint8_t x61 = _addcarryx_u51(x57, x26, x58, &x60);
+ { uint64_t x62 = (x49 & 0x7ffffffffffff);
+ { uint64_t x64; uint8_t x65 = _addcarryx_u51(x61, x29, x62, &x64);
+ { uint64_t x66 = (x49 & 0x7ffffffffffff);
+ { uint64_t x68; uint8_t x69 = _addcarryx_u51(x65, x32, x66, &x68);
+ { uint64_t x70 = (x49 & 0x7ffffffffffff);
+ { uint64_t x72; uint8_t x73 = _addcarryx_u51(x69, x35, x70, &x72);
+ { uint64_t x74 = (x49 & 0x7ffffffffffff);
+ { uint64_t x76; uint8_t x77 = _addcarryx_u51(x73, x38, x74, &x76);
+ { uint64_t x78 = (x49 & 0x7ffffffffffff);
+ { uint64_t x80; uint8_t x81 = _addcarryx_u51(x77, x41, x78, &x80);
+ { uint64_t x82 = (x49 & 0x7ffffffffffff);
+ { uint64_t x84; uint8_t x85 = _addcarryx_u51(x81, x44, x82, &x84);
+ { uint64_t x86 = (x49 & 0x7ffffffffffff);
+ { uint64_t x88; uint8_t _ = _addcarryx_u51(x85, x47, x86, &x88);
+ out[0] = x52;
+ out[1] = x56;
+ out[2] = x60;
+ out[3] = x64;
+ out[4] = x68;
+ out[5] = x72;
+ out[6] = x76;
+ out[7] = x80;
+ out[8] = x84;
+ out[9] = x88;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e511m481/femul.c b/src/Specific/solinas64_2e511m481/femul.c
index 7cc6675b3..2f40750f5 100644
--- a/src/Specific/solinas64_2e511m481/femul.c
+++ b/src/Specific/solinas64_2e511m481/femul.c
@@ -1,76 +1,78 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
-{ uint128_t x40 = (((uint128_t)x5 * x38) + ((0x2 * ((uint128_t)x7 * x39)) + ((0x2 * ((uint128_t)x9 * x37)) + ((0x2 * ((uint128_t)x11 * x35)) + ((0x2 * ((uint128_t)x13 * x33)) + ((0x2 * ((uint128_t)x15 * x31)) + ((0x2 * ((uint128_t)x17 * x29)) + ((0x2 * ((uint128_t)x19 * x27)) + ((0x2 * ((uint128_t)x21 * x25)) + ((uint128_t)x20 * x23))))))))));
-{ uint128_t x41 = ((((uint128_t)x5 * x39) + ((0x2 * ((uint128_t)x7 * x37)) + ((0x2 * ((uint128_t)x9 * x35)) + ((0x2 * ((uint128_t)x11 * x33)) + ((0x2 * ((uint128_t)x13 * x31)) + ((0x2 * ((uint128_t)x15 * x29)) + ((0x2 * ((uint128_t)x17 * x27)) + ((0x2 * ((uint128_t)x19 * x25)) + ((uint128_t)x21 * x23))))))))) + (0x1e1 * ((uint128_t)x20 * x38)));
-{ uint128_t x42 = ((((uint128_t)x5 * x37) + ((0x2 * ((uint128_t)x7 * x35)) + ((0x2 * ((uint128_t)x9 * x33)) + ((0x2 * ((uint128_t)x11 * x31)) + ((0x2 * ((uint128_t)x13 * x29)) + ((0x2 * ((uint128_t)x15 * x27)) + ((0x2 * ((uint128_t)x17 * x25)) + ((uint128_t)x19 * x23)))))))) + (0x1e1 * (((uint128_t)x21 * x38) + ((uint128_t)x20 * x39))));
-{ uint128_t x43 = ((((uint128_t)x5 * x35) + ((0x2 * ((uint128_t)x7 * x33)) + ((0x2 * ((uint128_t)x9 * x31)) + ((0x2 * ((uint128_t)x11 * x29)) + ((0x2 * ((uint128_t)x13 * x27)) + ((0x2 * ((uint128_t)x15 * x25)) + ((uint128_t)x17 * x23))))))) + (0x1e1 * (((uint128_t)x19 * x38) + (((uint128_t)x21 * x39) + ((uint128_t)x20 * x37)))));
-{ uint128_t x44 = ((((uint128_t)x5 * x33) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + ((0x2 * ((uint128_t)x11 * x27)) + ((0x2 * ((uint128_t)x13 * x25)) + ((uint128_t)x15 * x23)))))) + (0x1e1 * (((uint128_t)x17 * x38) + (((uint128_t)x19 * x39) + (((uint128_t)x21 * x37) + ((uint128_t)x20 * x35))))));
-{ uint128_t x45 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((uint128_t)x13 * x23))))) + (0x1e1 * (((uint128_t)x15 * x38) + (((uint128_t)x17 * x39) + (((uint128_t)x19 * x37) + (((uint128_t)x21 * x35) + ((uint128_t)x20 * x33)))))));
-{ uint128_t x46 = ((((uint128_t)x5 * x29) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((uint128_t)x11 * x23)))) + (0x1e1 * (((uint128_t)x13 * x38) + (((uint128_t)x15 * x39) + (((uint128_t)x17 * x37) + (((uint128_t)x19 * x35) + (((uint128_t)x21 * x33) + ((uint128_t)x20 * x31))))))));
-{ uint128_t x47 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((uint128_t)x9 * x23))) + (0x1e1 * (((uint128_t)x11 * x38) + (((uint128_t)x13 * x39) + (((uint128_t)x15 * x37) + (((uint128_t)x17 * x35) + (((uint128_t)x19 * x33) + (((uint128_t)x21 * x31) + ((uint128_t)x20 * x29)))))))));
-{ uint128_t x48 = ((((uint128_t)x5 * x25) + ((uint128_t)x7 * x23)) + (0x1e1 * (((uint128_t)x9 * x38) + (((uint128_t)x11 * x39) + (((uint128_t)x13 * x37) + (((uint128_t)x15 * x35) + (((uint128_t)x17 * x33) + (((uint128_t)x19 * x31) + (((uint128_t)x21 * x29) + ((uint128_t)x20 * x27))))))))));
-{ uint128_t x49 = (((uint128_t)x5 * x23) + (0x1e1 * ((0x2 * ((uint128_t)x7 * x38)) + ((0x2 * ((uint128_t)x9 * x39)) + ((0x2 * ((uint128_t)x11 * x37)) + ((0x2 * ((uint128_t)x13 * x35)) + ((0x2 * ((uint128_t)x15 * x33)) + ((0x2 * ((uint128_t)x17 * x31)) + ((0x2 * ((uint128_t)x19 * x29)) + ((0x2 * ((uint128_t)x21 * x27)) + (0x2 * ((uint128_t)x20 * x25))))))))))));
-{ uint128_t x50 = (x49 >> 0x34);
-{ uint64_t x51 = ((uint64_t)x49 & 0xfffffffffffff);
-{ uint128_t x52 = (x50 + x48);
-{ uint128_t x53 = (x52 >> 0x33);
-{ uint64_t x54 = ((uint64_t)x52 & 0x7ffffffffffff);
-{ uint128_t x55 = (x53 + x47);
-{ uint128_t x56 = (x55 >> 0x33);
-{ uint64_t x57 = ((uint64_t)x55 & 0x7ffffffffffff);
-{ uint128_t x58 = (x56 + x46);
-{ uint128_t x59 = (x58 >> 0x33);
-{ uint64_t x60 = ((uint64_t)x58 & 0x7ffffffffffff);
-{ uint128_t x61 = (x59 + x45);
-{ uint128_t x62 = (x61 >> 0x33);
-{ uint64_t x63 = ((uint64_t)x61 & 0x7ffffffffffff);
-{ uint128_t x64 = (x62 + x44);
-{ uint128_t x65 = (x64 >> 0x33);
-{ uint64_t x66 = ((uint64_t)x64 & 0x7ffffffffffff);
-{ uint128_t x67 = (x65 + x43);
-{ uint128_t x68 = (x67 >> 0x33);
-{ uint64_t x69 = ((uint64_t)x67 & 0x7ffffffffffff);
-{ uint128_t x70 = (x68 + x42);
-{ uint128_t x71 = (x70 >> 0x33);
-{ uint64_t x72 = ((uint64_t)x70 & 0x7ffffffffffff);
-{ uint128_t x73 = (x71 + x41);
-{ uint64_t x74 = (uint64_t) (x73 >> 0x33);
-{ uint64_t x75 = ((uint64_t)x73 & 0x7ffffffffffff);
-{ uint128_t x76 = (x74 + x40);
-{ uint64_t x77 = (uint64_t) (x76 >> 0x33);
-{ uint64_t x78 = ((uint64_t)x76 & 0x7ffffffffffff);
-{ uint128_t x79 = (x51 + ((uint128_t)0x1e1 * x77));
-{ uint64_t x80 = (uint64_t) (x79 >> 0x34);
-{ uint64_t x81 = ((uint64_t)x79 & 0xfffffffffffff);
-{ uint64_t x82 = (x80 + x54);
-{ uint64_t x83 = (x82 >> 0x33);
-{ uint64_t x84 = (x82 & 0x7ffffffffffff);
-out[0] = x78;
-out[1] = x75;
-out[2] = x72;
-out[3] = x69;
-out[4] = x66;
-out[5] = x63;
-out[6] = x60;
-out[7] = x83 + x57;
-out[8] = x84;
-out[9] = x81;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void femul(uint64_t out[10], const uint64_t in1[10], const uint64_t in2[10]) {
+ { const uint64_t x20 = in1[9];
+ { const uint64_t x21 = in1[8];
+ { const uint64_t x19 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x38 = in2[9];
+ { const uint64_t x39 = in2[8];
+ { const uint64_t x37 = in2[7];
+ { const uint64_t x35 = in2[6];
+ { const uint64_t x33 = in2[5];
+ { const uint64_t x31 = in2[4];
+ { const uint64_t x29 = in2[3];
+ { const uint64_t x27 = in2[2];
+ { const uint64_t x25 = in2[1];
+ { const uint64_t x23 = in2[0];
+ { uint128_t x40 = (((uint128_t)x5 * x38) + ((0x2 * ((uint128_t)x7 * x39)) + ((0x2 * ((uint128_t)x9 * x37)) + ((0x2 * ((uint128_t)x11 * x35)) + ((0x2 * ((uint128_t)x13 * x33)) + ((0x2 * ((uint128_t)x15 * x31)) + ((0x2 * ((uint128_t)x17 * x29)) + ((0x2 * ((uint128_t)x19 * x27)) + ((0x2 * ((uint128_t)x21 * x25)) + ((uint128_t)x20 * x23))))))))));
+ { uint128_t x41 = ((((uint128_t)x5 * x39) + ((0x2 * ((uint128_t)x7 * x37)) + ((0x2 * ((uint128_t)x9 * x35)) + ((0x2 * ((uint128_t)x11 * x33)) + ((0x2 * ((uint128_t)x13 * x31)) + ((0x2 * ((uint128_t)x15 * x29)) + ((0x2 * ((uint128_t)x17 * x27)) + ((0x2 * ((uint128_t)x19 * x25)) + ((uint128_t)x21 * x23))))))))) + (0x1e1 * ((uint128_t)x20 * x38)));
+ { uint128_t x42 = ((((uint128_t)x5 * x37) + ((0x2 * ((uint128_t)x7 * x35)) + ((0x2 * ((uint128_t)x9 * x33)) + ((0x2 * ((uint128_t)x11 * x31)) + ((0x2 * ((uint128_t)x13 * x29)) + ((0x2 * ((uint128_t)x15 * x27)) + ((0x2 * ((uint128_t)x17 * x25)) + ((uint128_t)x19 * x23)))))))) + (0x1e1 * (((uint128_t)x21 * x38) + ((uint128_t)x20 * x39))));
+ { uint128_t x43 = ((((uint128_t)x5 * x35) + ((0x2 * ((uint128_t)x7 * x33)) + ((0x2 * ((uint128_t)x9 * x31)) + ((0x2 * ((uint128_t)x11 * x29)) + ((0x2 * ((uint128_t)x13 * x27)) + ((0x2 * ((uint128_t)x15 * x25)) + ((uint128_t)x17 * x23))))))) + (0x1e1 * (((uint128_t)x19 * x38) + (((uint128_t)x21 * x39) + ((uint128_t)x20 * x37)))));
+ { uint128_t x44 = ((((uint128_t)x5 * x33) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + ((0x2 * ((uint128_t)x11 * x27)) + ((0x2 * ((uint128_t)x13 * x25)) + ((uint128_t)x15 * x23)))))) + (0x1e1 * (((uint128_t)x17 * x38) + (((uint128_t)x19 * x39) + (((uint128_t)x21 * x37) + ((uint128_t)x20 * x35))))));
+ { uint128_t x45 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((uint128_t)x13 * x23))))) + (0x1e1 * (((uint128_t)x15 * x38) + (((uint128_t)x17 * x39) + (((uint128_t)x19 * x37) + (((uint128_t)x21 * x35) + ((uint128_t)x20 * x33)))))));
+ { uint128_t x46 = ((((uint128_t)x5 * x29) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((uint128_t)x11 * x23)))) + (0x1e1 * (((uint128_t)x13 * x38) + (((uint128_t)x15 * x39) + (((uint128_t)x17 * x37) + (((uint128_t)x19 * x35) + (((uint128_t)x21 * x33) + ((uint128_t)x20 * x31))))))));
+ { uint128_t x47 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((uint128_t)x9 * x23))) + (0x1e1 * (((uint128_t)x11 * x38) + (((uint128_t)x13 * x39) + (((uint128_t)x15 * x37) + (((uint128_t)x17 * x35) + (((uint128_t)x19 * x33) + (((uint128_t)x21 * x31) + ((uint128_t)x20 * x29)))))))));
+ { uint128_t x48 = ((((uint128_t)x5 * x25) + ((uint128_t)x7 * x23)) + (0x1e1 * (((uint128_t)x9 * x38) + (((uint128_t)x11 * x39) + (((uint128_t)x13 * x37) + (((uint128_t)x15 * x35) + (((uint128_t)x17 * x33) + (((uint128_t)x19 * x31) + (((uint128_t)x21 * x29) + ((uint128_t)x20 * x27))))))))));
+ { uint128_t x49 = (((uint128_t)x5 * x23) + (0x1e1 * ((0x2 * ((uint128_t)x7 * x38)) + ((0x2 * ((uint128_t)x9 * x39)) + ((0x2 * ((uint128_t)x11 * x37)) + ((0x2 * ((uint128_t)x13 * x35)) + ((0x2 * ((uint128_t)x15 * x33)) + ((0x2 * ((uint128_t)x17 * x31)) + ((0x2 * ((uint128_t)x19 * x29)) + ((0x2 * ((uint128_t)x21 * x27)) + (0x2 * ((uint128_t)x20 * x25))))))))))));
+ { uint128_t x50 = (x49 >> 0x34);
+ { uint64_t x51 = ((uint64_t)x49 & 0xfffffffffffff);
+ { uint128_t x52 = (x50 + x48);
+ { uint128_t x53 = (x52 >> 0x33);
+ { uint64_t x54 = ((uint64_t)x52 & 0x7ffffffffffff);
+ { uint128_t x55 = (x53 + x47);
+ { uint128_t x56 = (x55 >> 0x33);
+ { uint64_t x57 = ((uint64_t)x55 & 0x7ffffffffffff);
+ { uint128_t x58 = (x56 + x46);
+ { uint128_t x59 = (x58 >> 0x33);
+ { uint64_t x60 = ((uint64_t)x58 & 0x7ffffffffffff);
+ { uint128_t x61 = (x59 + x45);
+ { uint128_t x62 = (x61 >> 0x33);
+ { uint64_t x63 = ((uint64_t)x61 & 0x7ffffffffffff);
+ { uint128_t x64 = (x62 + x44);
+ { uint128_t x65 = (x64 >> 0x33);
+ { uint64_t x66 = ((uint64_t)x64 & 0x7ffffffffffff);
+ { uint128_t x67 = (x65 + x43);
+ { uint128_t x68 = (x67 >> 0x33);
+ { uint64_t x69 = ((uint64_t)x67 & 0x7ffffffffffff);
+ { uint128_t x70 = (x68 + x42);
+ { uint128_t x71 = (x70 >> 0x33);
+ { uint64_t x72 = ((uint64_t)x70 & 0x7ffffffffffff);
+ { uint128_t x73 = (x71 + x41);
+ { uint64_t x74 = (uint64_t) (x73 >> 0x33);
+ { uint64_t x75 = ((uint64_t)x73 & 0x7ffffffffffff);
+ { uint128_t x76 = (x74 + x40);
+ { uint64_t x77 = (uint64_t) (x76 >> 0x33);
+ { uint64_t x78 = ((uint64_t)x76 & 0x7ffffffffffff);
+ { uint128_t x79 = (x51 + ((uint128_t)0x1e1 * x77));
+ { uint64_t x80 = (uint64_t) (x79 >> 0x34);
+ { uint64_t x81 = ((uint64_t)x79 & 0xfffffffffffff);
+ { uint64_t x82 = (x80 + x54);
+ { uint64_t x83 = (x82 >> 0x33);
+ { uint64_t x84 = (x82 & 0x7ffffffffffff);
+ out[0] = x81;
+ out[1] = x84;
+ out[2] = (x83 + x57);
+ out[3] = x60;
+ out[4] = x63;
+ out[5] = x66;
+ out[6] = x69;
+ out[7] = x72;
+ out[8] = x75;
+ out[9] = x78;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e511m481/fesquare.c b/src/Specific/solinas64_2e511m481/fesquare.c
index 119be6910..973d0ba84 100644
--- a/src/Specific/solinas64_2e511m481/fesquare.c
+++ b/src/Specific/solinas64_2e511m481/fesquare.c
@@ -1,76 +1,68 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x19 = (((uint128_t)x2 * x17) + ((0x2 * ((uint128_t)x4 * x18)) + ((0x2 * ((uint128_t)x6 * x16)) + ((0x2 * ((uint128_t)x8 * x14)) + ((0x2 * ((uint128_t)x10 * x12)) + ((0x2 * ((uint128_t)x12 * x10)) + ((0x2 * ((uint128_t)x14 * x8)) + ((0x2 * ((uint128_t)x16 * x6)) + ((0x2 * ((uint128_t)x18 * x4)) + ((uint128_t)x17 * x2))))))))));
-{ uint128_t x20 = ((((uint128_t)x2 * x18) + ((0x2 * ((uint128_t)x4 * x16)) + ((0x2 * ((uint128_t)x6 * x14)) + ((0x2 * ((uint128_t)x8 * x12)) + ((0x2 * ((uint128_t)x10 * x10)) + ((0x2 * ((uint128_t)x12 * x8)) + ((0x2 * ((uint128_t)x14 * x6)) + ((0x2 * ((uint128_t)x16 * x4)) + ((uint128_t)x18 * x2))))))))) + (0x1e1 * ((uint128_t)x17 * x17)));
-{ uint128_t x21 = ((((uint128_t)x2 * x16) + ((0x2 * ((uint128_t)x4 * x14)) + ((0x2 * ((uint128_t)x6 * x12)) + ((0x2 * ((uint128_t)x8 * x10)) + ((0x2 * ((uint128_t)x10 * x8)) + ((0x2 * ((uint128_t)x12 * x6)) + ((0x2 * ((uint128_t)x14 * x4)) + ((uint128_t)x16 * x2)))))))) + (0x1e1 * (((uint128_t)x18 * x17) + ((uint128_t)x17 * x18))));
-{ uint128_t x22 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + ((0x2 * ((uint128_t)x6 * x10)) + ((0x2 * ((uint128_t)x8 * x8)) + ((0x2 * ((uint128_t)x10 * x6)) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x1e1 * (((uint128_t)x16 * x17) + (((uint128_t)x18 * x18) + ((uint128_t)x17 * x16)))));
-{ uint128_t x23 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0x1e1 * (((uint128_t)x14 * x17) + (((uint128_t)x16 * x18) + (((uint128_t)x18 * x16) + ((uint128_t)x17 * x14))))));
-{ uint128_t x24 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x1e1 * (((uint128_t)x12 * x17) + (((uint128_t)x14 * x18) + (((uint128_t)x16 * x16) + (((uint128_t)x18 * x14) + ((uint128_t)x17 * x12)))))));
-{ uint128_t x25 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x1e1 * (((uint128_t)x10 * x17) + (((uint128_t)x12 * x18) + (((uint128_t)x14 * x16) + (((uint128_t)x16 * x14) + (((uint128_t)x18 * x12) + ((uint128_t)x17 * x10))))))));
-{ uint128_t x26 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x1e1 * (((uint128_t)x8 * x17) + (((uint128_t)x10 * x18) + (((uint128_t)x12 * x16) + (((uint128_t)x14 * x14) + (((uint128_t)x16 * x12) + (((uint128_t)x18 * x10) + ((uint128_t)x17 * x8)))))))));
-{ uint128_t x27 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x1e1 * (((uint128_t)x6 * x17) + (((uint128_t)x8 * x18) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + (((uint128_t)x18 * x8) + ((uint128_t)x17 * x6))))))))));
-{ uint128_t x28 = (((uint128_t)x2 * x2) + (0x1e1 * ((0x2 * ((uint128_t)x4 * x17)) + ((0x2 * ((uint128_t)x6 * x18)) + ((0x2 * ((uint128_t)x8 * x16)) + ((0x2 * ((uint128_t)x10 * x14)) + ((0x2 * ((uint128_t)x12 * x12)) + ((0x2 * ((uint128_t)x14 * x10)) + ((0x2 * ((uint128_t)x16 * x8)) + ((0x2 * ((uint128_t)x18 * x6)) + (0x2 * ((uint128_t)x17 * x4))))))))))));
-{ uint128_t x29 = (x28 >> 0x34);
-{ uint64_t x30 = ((uint64_t)x28 & 0xfffffffffffff);
-{ uint128_t x31 = (x29 + x27);
-{ uint128_t x32 = (x31 >> 0x33);
-{ uint64_t x33 = ((uint64_t)x31 & 0x7ffffffffffff);
-{ uint128_t x34 = (x32 + x26);
-{ uint128_t x35 = (x34 >> 0x33);
-{ uint64_t x36 = ((uint64_t)x34 & 0x7ffffffffffff);
-{ uint128_t x37 = (x35 + x25);
-{ uint128_t x38 = (x37 >> 0x33);
-{ uint64_t x39 = ((uint64_t)x37 & 0x7ffffffffffff);
-{ uint128_t x40 = (x38 + x24);
-{ uint128_t x41 = (x40 >> 0x33);
-{ uint64_t x42 = ((uint64_t)x40 & 0x7ffffffffffff);
-{ uint128_t x43 = (x41 + x23);
-{ uint128_t x44 = (x43 >> 0x33);
-{ uint64_t x45 = ((uint64_t)x43 & 0x7ffffffffffff);
-{ uint128_t x46 = (x44 + x22);
-{ uint128_t x47 = (x46 >> 0x33);
-{ uint64_t x48 = ((uint64_t)x46 & 0x7ffffffffffff);
-{ uint128_t x49 = (x47 + x21);
-{ uint128_t x50 = (x49 >> 0x33);
-{ uint64_t x51 = ((uint64_t)x49 & 0x7ffffffffffff);
-{ uint128_t x52 = (x50 + x20);
-{ uint64_t x53 = (uint64_t) (x52 >> 0x33);
-{ uint64_t x54 = ((uint64_t)x52 & 0x7ffffffffffff);
-{ uint128_t x55 = (x53 + x19);
-{ uint64_t x56 = (uint64_t) (x55 >> 0x33);
-{ uint64_t x57 = ((uint64_t)x55 & 0x7ffffffffffff);
-{ uint128_t x58 = (x30 + ((uint128_t)0x1e1 * x56));
-{ uint64_t x59 = (uint64_t) (x58 >> 0x34);
-{ uint64_t x60 = ((uint64_t)x58 & 0xfffffffffffff);
-{ uint64_t x61 = (x59 + x33);
-{ uint64_t x62 = (x61 >> 0x33);
-{ uint64_t x63 = (x61 & 0x7ffffffffffff);
-out[0] = x57;
-out[1] = x54;
-out[2] = x51;
-out[3] = x48;
-out[4] = x45;
-out[5] = x42;
-out[6] = x39;
-out[7] = x62 + x36;
-out[8] = x63;
-out[9] = x60;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void fesquare(uint64_t out[10], const uint64_t in1[10]) {
+ { const uint64_t x17 = in1[9];
+ { const uint64_t x18 = in1[8];
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x19 = (((uint128_t)x2 * x17) + ((0x2 * ((uint128_t)x4 * x18)) + ((0x2 * ((uint128_t)x6 * x16)) + ((0x2 * ((uint128_t)x8 * x14)) + ((0x2 * ((uint128_t)x10 * x12)) + ((0x2 * ((uint128_t)x12 * x10)) + ((0x2 * ((uint128_t)x14 * x8)) + ((0x2 * ((uint128_t)x16 * x6)) + ((0x2 * ((uint128_t)x18 * x4)) + ((uint128_t)x17 * x2))))))))));
+ { uint128_t x20 = ((((uint128_t)x2 * x18) + ((0x2 * ((uint128_t)x4 * x16)) + ((0x2 * ((uint128_t)x6 * x14)) + ((0x2 * ((uint128_t)x8 * x12)) + ((0x2 * ((uint128_t)x10 * x10)) + ((0x2 * ((uint128_t)x12 * x8)) + ((0x2 * ((uint128_t)x14 * x6)) + ((0x2 * ((uint128_t)x16 * x4)) + ((uint128_t)x18 * x2))))))))) + (0x1e1 * ((uint128_t)x17 * x17)));
+ { uint128_t x21 = ((((uint128_t)x2 * x16) + ((0x2 * ((uint128_t)x4 * x14)) + ((0x2 * ((uint128_t)x6 * x12)) + ((0x2 * ((uint128_t)x8 * x10)) + ((0x2 * ((uint128_t)x10 * x8)) + ((0x2 * ((uint128_t)x12 * x6)) + ((0x2 * ((uint128_t)x14 * x4)) + ((uint128_t)x16 * x2)))))))) + (0x1e1 * (((uint128_t)x18 * x17) + ((uint128_t)x17 * x18))));
+ { uint128_t x22 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + ((0x2 * ((uint128_t)x6 * x10)) + ((0x2 * ((uint128_t)x8 * x8)) + ((0x2 * ((uint128_t)x10 * x6)) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x1e1 * (((uint128_t)x16 * x17) + (((uint128_t)x18 * x18) + ((uint128_t)x17 * x16)))));
+ { uint128_t x23 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0x1e1 * (((uint128_t)x14 * x17) + (((uint128_t)x16 * x18) + (((uint128_t)x18 * x16) + ((uint128_t)x17 * x14))))));
+ { uint128_t x24 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x1e1 * (((uint128_t)x12 * x17) + (((uint128_t)x14 * x18) + (((uint128_t)x16 * x16) + (((uint128_t)x18 * x14) + ((uint128_t)x17 * x12)))))));
+ { uint128_t x25 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x1e1 * (((uint128_t)x10 * x17) + (((uint128_t)x12 * x18) + (((uint128_t)x14 * x16) + (((uint128_t)x16 * x14) + (((uint128_t)x18 * x12) + ((uint128_t)x17 * x10))))))));
+ { uint128_t x26 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x1e1 * (((uint128_t)x8 * x17) + (((uint128_t)x10 * x18) + (((uint128_t)x12 * x16) + (((uint128_t)x14 * x14) + (((uint128_t)x16 * x12) + (((uint128_t)x18 * x10) + ((uint128_t)x17 * x8)))))))));
+ { uint128_t x27 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x1e1 * (((uint128_t)x6 * x17) + (((uint128_t)x8 * x18) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + (((uint128_t)x18 * x8) + ((uint128_t)x17 * x6))))))))));
+ { uint128_t x28 = (((uint128_t)x2 * x2) + (0x1e1 * ((0x2 * ((uint128_t)x4 * x17)) + ((0x2 * ((uint128_t)x6 * x18)) + ((0x2 * ((uint128_t)x8 * x16)) + ((0x2 * ((uint128_t)x10 * x14)) + ((0x2 * ((uint128_t)x12 * x12)) + ((0x2 * ((uint128_t)x14 * x10)) + ((0x2 * ((uint128_t)x16 * x8)) + ((0x2 * ((uint128_t)x18 * x6)) + (0x2 * ((uint128_t)x17 * x4))))))))))));
+ { uint128_t x29 = (x28 >> 0x34);
+ { uint64_t x30 = ((uint64_t)x28 & 0xfffffffffffff);
+ { uint128_t x31 = (x29 + x27);
+ { uint128_t x32 = (x31 >> 0x33);
+ { uint64_t x33 = ((uint64_t)x31 & 0x7ffffffffffff);
+ { uint128_t x34 = (x32 + x26);
+ { uint128_t x35 = (x34 >> 0x33);
+ { uint64_t x36 = ((uint64_t)x34 & 0x7ffffffffffff);
+ { uint128_t x37 = (x35 + x25);
+ { uint128_t x38 = (x37 >> 0x33);
+ { uint64_t x39 = ((uint64_t)x37 & 0x7ffffffffffff);
+ { uint128_t x40 = (x38 + x24);
+ { uint128_t x41 = (x40 >> 0x33);
+ { uint64_t x42 = ((uint64_t)x40 & 0x7ffffffffffff);
+ { uint128_t x43 = (x41 + x23);
+ { uint128_t x44 = (x43 >> 0x33);
+ { uint64_t x45 = ((uint64_t)x43 & 0x7ffffffffffff);
+ { uint128_t x46 = (x44 + x22);
+ { uint128_t x47 = (x46 >> 0x33);
+ { uint64_t x48 = ((uint64_t)x46 & 0x7ffffffffffff);
+ { uint128_t x49 = (x47 + x21);
+ { uint128_t x50 = (x49 >> 0x33);
+ { uint64_t x51 = ((uint64_t)x49 & 0x7ffffffffffff);
+ { uint128_t x52 = (x50 + x20);
+ { uint64_t x53 = (uint64_t) (x52 >> 0x33);
+ { uint64_t x54 = ((uint64_t)x52 & 0x7ffffffffffff);
+ { uint128_t x55 = (x53 + x19);
+ { uint64_t x56 = (uint64_t) (x55 >> 0x33);
+ { uint64_t x57 = ((uint64_t)x55 & 0x7ffffffffffff);
+ { uint128_t x58 = (x30 + ((uint128_t)0x1e1 * x56));
+ { uint64_t x59 = (uint64_t) (x58 >> 0x34);
+ { uint64_t x60 = ((uint64_t)x58 & 0xfffffffffffff);
+ { uint64_t x61 = (x59 + x33);
+ { uint64_t x62 = (x61 >> 0x33);
+ { uint64_t x63 = (x61 & 0x7ffffffffffff);
+ out[0] = x60;
+ out[1] = x63;
+ out[2] = (x62 + x36);
+ out[3] = x39;
+ out[4] = x42;
+ out[5] = x45;
+ out[6] = x48;
+ out[7] = x51;
+ out[8] = x54;
+ out[9] = x57;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e511m481/freeze.c b/src/Specific/solinas64_2e511m481/freeze.c
index 0972f7c11..ff8df981d 100644
--- a/src/Specific/solinas64_2e511m481/freeze.c
+++ b/src/Specific/solinas64_2e511m481/freeze.c
@@ -1,25 +1,54 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x20;
-out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 52 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffffffffe1f;;
+static void freeze(uint64_t out[10], const uint64_t in1[10]) {
+ { const uint64_t x17 = in1[9];
+ { const uint64_t x18 = in1[8];
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x20, uint8_t x21 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xffffffffffe1f);
+ { uint64_t x23; uint8_t x24 = _subborrow_u51(x21, x4, 0x7ffffffffffff, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u51(x24, x6, 0x7ffffffffffff, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u51(x27, x8, 0x7ffffffffffff, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u51(x30, x10, 0x7ffffffffffff, &x32);
+ { uint64_t x35; uint8_t x36 = _subborrow_u51(x33, x12, 0x7ffffffffffff, &x35);
+ { uint64_t x38; uint8_t x39 = _subborrow_u51(x36, x14, 0x7ffffffffffff, &x38);
+ { uint64_t x41; uint8_t x42 = _subborrow_u51(x39, x16, 0x7ffffffffffff, &x41);
+ { uint64_t x44; uint8_t x45 = _subborrow_u51(x42, x18, 0x7ffffffffffff, &x44);
+ { uint64_t x47; uint8_t x48 = _subborrow_u51(x45, x17, 0x7ffffffffffff, &x47);
+ { uint64_t x49 = (uint64_t)cmovznz(x48, 0x0, 0xffffffffffffffffL);
+ { uint64_t x50 = (x49 & 0xffffffffffe1f);
+ { uint64_t x52, uint8_t x53 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x20, Return x50);
+ { uint64_t x54 = (x49 & 0x7ffffffffffff);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u51(x53, x23, x54, &x56);
+ { uint64_t x58 = (x49 & 0x7ffffffffffff);
+ { uint64_t x60; uint8_t x61 = _addcarryx_u51(x57, x26, x58, &x60);
+ { uint64_t x62 = (x49 & 0x7ffffffffffff);
+ { uint64_t x64; uint8_t x65 = _addcarryx_u51(x61, x29, x62, &x64);
+ { uint64_t x66 = (x49 & 0x7ffffffffffff);
+ { uint64_t x68; uint8_t x69 = _addcarryx_u51(x65, x32, x66, &x68);
+ { uint64_t x70 = (x49 & 0x7ffffffffffff);
+ { uint64_t x72; uint8_t x73 = _addcarryx_u51(x69, x35, x70, &x72);
+ { uint64_t x74 = (x49 & 0x7ffffffffffff);
+ { uint64_t x76; uint8_t x77 = _addcarryx_u51(x73, x38, x74, &x76);
+ { uint64_t x78 = (x49 & 0x7ffffffffffff);
+ { uint64_t x80; uint8_t x81 = _addcarryx_u51(x77, x41, x78, &x80);
+ { uint64_t x82 = (x49 & 0x7ffffffffffff);
+ { uint64_t x84; uint8_t x85 = _addcarryx_u51(x81, x44, x82, &x84);
+ { uint64_t x86 = (x49 & 0x7ffffffffffff);
+ { uint64_t x88; uint8_t _ = _addcarryx_u51(x85, x47, x86, &x88);
+ out[0] = x52;
+ out[1] = x56;
+ out[2] = x60;
+ out[3] = x64;
+ out[4] = x68;
+ out[5] = x72;
+ out[6] = x76;
+ out[7] = x80;
+ out[8] = x84;
+ out[9] = x88;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e512m491x2e496m1/freeze.c b/src/Specific/solinas64_2e512m491x2e496m1/freeze.c
index 9f6318ae5..4d6d97806 100644
--- a/src/Specific/solinas64_2e512m491x2e496m1/freeze.c
+++ b/src/Specific/solinas64_2e512m491x2e496m1/freeze.c
@@ -1,25 +1,54 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x20;
-out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 52 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xfffffffffffff;;
+static void freeze(uint64_t out[10], const uint64_t in1[10]) {
+ { const uint64_t x17 = in1[9];
+ { const uint64_t x18 = in1[8];
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x20, uint8_t x21 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xfffffffffffff);
+ { uint64_t x23; uint8_t x24 = _subborrow_u51(x21, x4, 0x7ffffffffffff, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u51(x24, x6, 0x7ffffffffffff, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u51(x27, x8, 0x7ffffffffffff, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u51(x30, x10, 0x7ffffffffffff, &x32);
+ { uint64_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x33, Return x12, 0xfffffffffffff);
+ { uint64_t x38; uint8_t x39 = _subborrow_u51(x36, x14, 0x7ffffffffffff, &x38);
+ { uint64_t x41; uint8_t x42 = _subborrow_u51(x39, x16, 0x7ffffffffffff, &x41);
+ { uint64_t x44; uint8_t x45 = _subborrow_u51(x42, x18, 0x7ffffffffffff, &x44);
+ { uint64_t x47; uint8_t x48 = _subborrow_u51(x45, x17, 0x7f0a7ffffffff, &x47);
+ { uint64_t x49 = (uint64_t)cmovznz(x48, 0x0, 0xffffffffffffffffL);
+ { uint64_t x50 = (x49 & 0xfffffffffffff);
+ { uint64_t x52, uint8_t x53 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x20, Return x50);
+ { uint64_t x54 = (x49 & 0x7ffffffffffff);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u51(x53, x23, x54, &x56);
+ { uint64_t x58 = (x49 & 0x7ffffffffffff);
+ { uint64_t x60; uint8_t x61 = _addcarryx_u51(x57, x26, x58, &x60);
+ { uint64_t x62 = (x49 & 0x7ffffffffffff);
+ { uint64_t x64; uint8_t x65 = _addcarryx_u51(x61, x29, x62, &x64);
+ { uint64_t x66 = (x49 & 0x7ffffffffffff);
+ { uint64_t x68; uint8_t x69 = _addcarryx_u51(x65, x32, x66, &x68);
+ { uint64_t x70 = (x49 & 0xfffffffffffff);
+ { uint64_t x72, uint8_t x73 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x69, Return x35, Return x70);
+ { uint64_t x74 = (x49 & 0x7ffffffffffff);
+ { uint64_t x76; uint8_t x77 = _addcarryx_u51(x73, x38, x74, &x76);
+ { uint64_t x78 = (x49 & 0x7ffffffffffff);
+ { uint64_t x80; uint8_t x81 = _addcarryx_u51(x77, x41, x78, &x80);
+ { uint64_t x82 = (x49 & 0x7ffffffffffff);
+ { uint64_t x84; uint8_t x85 = _addcarryx_u51(x81, x44, x82, &x84);
+ { uint64_t x86 = (x49 & 0x7f0a7ffffffff);
+ { uint64_t x88; uint8_t _ = _addcarryx_u51(x85, x47, x86, &x88);
+ out[0] = x52;
+ out[1] = x56;
+ out[2] = x60;
+ out[3] = x64;
+ out[4] = x68;
+ out[5] = x72;
+ out[6] = x76;
+ out[7] = x80;
+ out[8] = x84;
+ out[9] = x88;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e512m569/femul.c b/src/Specific/solinas64_2e512m569/femul.c
index e4b8904f7..46e0f261b 100644
--- a/src/Specific/solinas64_2e512m569/femul.c
+++ b/src/Specific/solinas64_2e512m569/femul.c
@@ -1,76 +1,78 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
-{ uint128_t x40 = (((uint128_t)x5 * x38) + ((0x2 * ((uint128_t)x7 * x39)) + ((0x2 * ((uint128_t)x9 * x37)) + ((0x2 * ((uint128_t)x11 * x35)) + (((uint128_t)x13 * x33) + (((uint128_t)x15 * x31) + ((0x2 * ((uint128_t)x17 * x29)) + ((0x2 * ((uint128_t)x19 * x27)) + ((0x2 * ((uint128_t)x21 * x25)) + ((uint128_t)x20 * x23))))))))));
-{ uint128_t x41 = ((((uint128_t)x5 * x39) + ((0x2 * ((uint128_t)x7 * x37)) + ((0x2 * ((uint128_t)x9 * x35)) + (((uint128_t)x11 * x33) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + ((0x2 * ((uint128_t)x17 * x27)) + ((0x2 * ((uint128_t)x19 * x25)) + ((uint128_t)x21 * x23))))))))) + (0x239 * ((uint128_t)x20 * x38)));
-{ uint128_t x42 = ((((uint128_t)x5 * x37) + ((0x2 * ((uint128_t)x7 * x35)) + (((uint128_t)x9 * x33) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + ((0x2 * ((uint128_t)x17 * x25)) + ((uint128_t)x19 * x23)))))))) + (0x239 * (((uint128_t)x21 * x38) + ((uint128_t)x20 * x39))));
-{ uint128_t x43 = ((((uint128_t)x5 * x35) + (((uint128_t)x7 * x33) + (((uint128_t)x9 * x31) + (((uint128_t)x11 * x29) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + ((uint128_t)x17 * x23))))))) + (0x239 * (((uint128_t)x19 * x38) + (((uint128_t)x21 * x39) + ((uint128_t)x20 * x37)))));
-{ uint128_t x44 = ((((uint128_t)x5 * x33) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + ((0x2 * ((uint128_t)x11 * x27)) + ((0x2 * ((uint128_t)x13 * x25)) + ((uint128_t)x15 * x23)))))) + (0x239 * ((0x2 * ((uint128_t)x17 * x38)) + ((0x2 * ((uint128_t)x19 * x39)) + ((0x2 * ((uint128_t)x21 * x37)) + (0x2 * ((uint128_t)x20 * x35)))))));
-{ uint128_t x45 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((uint128_t)x13 * x23))))) + (0x239 * (((uint128_t)x15 * x38) + ((0x2 * ((uint128_t)x17 * x39)) + ((0x2 * ((uint128_t)x19 * x37)) + ((0x2 * ((uint128_t)x21 * x35)) + ((uint128_t)x20 * x33)))))));
-{ uint128_t x46 = ((((uint128_t)x5 * x29) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((uint128_t)x11 * x23)))) + (0x239 * (((uint128_t)x13 * x38) + (((uint128_t)x15 * x39) + ((0x2 * ((uint128_t)x17 * x37)) + ((0x2 * ((uint128_t)x19 * x35)) + (((uint128_t)x21 * x33) + ((uint128_t)x20 * x31))))))));
-{ uint128_t x47 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((uint128_t)x9 * x23))) + (0x239 * (((uint128_t)x11 * x38) + (((uint128_t)x13 * x39) + (((uint128_t)x15 * x37) + ((0x2 * ((uint128_t)x17 * x35)) + (((uint128_t)x19 * x33) + (((uint128_t)x21 * x31) + ((uint128_t)x20 * x29)))))))));
-{ uint128_t x48 = ((((uint128_t)x5 * x25) + ((uint128_t)x7 * x23)) + (0x239 * (((uint128_t)x9 * x38) + (((uint128_t)x11 * x39) + (((uint128_t)x13 * x37) + (((uint128_t)x15 * x35) + (((uint128_t)x17 * x33) + (((uint128_t)x19 * x31) + (((uint128_t)x21 * x29) + ((uint128_t)x20 * x27))))))))));
-{ uint128_t x49 = (((uint128_t)x5 * x23) + (0x239 * ((0x2 * ((uint128_t)x7 * x38)) + ((0x2 * ((uint128_t)x9 * x39)) + ((0x2 * ((uint128_t)x11 * x37)) + ((0x2 * ((uint128_t)x13 * x35)) + (((uint128_t)x15 * x33) + ((0x2 * ((uint128_t)x17 * x31)) + ((0x2 * ((uint128_t)x19 * x29)) + ((0x2 * ((uint128_t)x21 * x27)) + (0x2 * ((uint128_t)x20 * x25))))))))))));
-{ uint128_t x50 = (x49 >> 0x34);
-{ uint64_t x51 = ((uint64_t)x49 & 0xfffffffffffff);
-{ uint128_t x52 = (x50 + x48);
-{ uint128_t x53 = (x52 >> 0x33);
-{ uint64_t x54 = ((uint64_t)x52 & 0x7ffffffffffff);
-{ uint128_t x55 = (x53 + x47);
-{ uint128_t x56 = (x55 >> 0x33);
-{ uint64_t x57 = ((uint64_t)x55 & 0x7ffffffffffff);
-{ uint128_t x58 = (x56 + x46);
-{ uint128_t x59 = (x58 >> 0x33);
-{ uint64_t x60 = ((uint64_t)x58 & 0x7ffffffffffff);
-{ uint128_t x61 = (x59 + x45);
-{ uint128_t x62 = (x61 >> 0x33);
-{ uint64_t x63 = ((uint64_t)x61 & 0x7ffffffffffff);
-{ uint128_t x64 = (x62 + x44);
-{ uint128_t x65 = (x64 >> 0x34);
-{ uint64_t x66 = ((uint64_t)x64 & 0xfffffffffffff);
-{ uint128_t x67 = (x65 + x43);
-{ uint128_t x68 = (x67 >> 0x33);
-{ uint64_t x69 = ((uint64_t)x67 & 0x7ffffffffffff);
-{ uint128_t x70 = (x68 + x42);
-{ uint128_t x71 = (x70 >> 0x33);
-{ uint64_t x72 = ((uint64_t)x70 & 0x7ffffffffffff);
-{ uint128_t x73 = (x71 + x41);
-{ uint64_t x74 = (uint64_t) (x73 >> 0x33);
-{ uint64_t x75 = ((uint64_t)x73 & 0x7ffffffffffff);
-{ uint128_t x76 = (x74 + x40);
-{ uint64_t x77 = (uint64_t) (x76 >> 0x33);
-{ uint64_t x78 = ((uint64_t)x76 & 0x7ffffffffffff);
-{ uint128_t x79 = (x51 + ((uint128_t)0x239 * x77));
-{ uint64_t x80 = (uint64_t) (x79 >> 0x34);
-{ uint64_t x81 = ((uint64_t)x79 & 0xfffffffffffff);
-{ uint64_t x82 = (x80 + x54);
-{ uint64_t x83 = (x82 >> 0x33);
-{ uint64_t x84 = (x82 & 0x7ffffffffffff);
-out[0] = x78;
-out[1] = x75;
-out[2] = x72;
-out[3] = x69;
-out[4] = x66;
-out[5] = x63;
-out[6] = x60;
-out[7] = x83 + x57;
-out[8] = x84;
-out[9] = x81;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void femul(uint64_t out[10], const uint64_t in1[10], const uint64_t in2[10]) {
+ { const uint64_t x20 = in1[9];
+ { const uint64_t x21 = in1[8];
+ { const uint64_t x19 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x38 = in2[9];
+ { const uint64_t x39 = in2[8];
+ { const uint64_t x37 = in2[7];
+ { const uint64_t x35 = in2[6];
+ { const uint64_t x33 = in2[5];
+ { const uint64_t x31 = in2[4];
+ { const uint64_t x29 = in2[3];
+ { const uint64_t x27 = in2[2];
+ { const uint64_t x25 = in2[1];
+ { const uint64_t x23 = in2[0];
+ { uint128_t x40 = (((uint128_t)x5 * x38) + ((0x2 * ((uint128_t)x7 * x39)) + ((0x2 * ((uint128_t)x9 * x37)) + ((0x2 * ((uint128_t)x11 * x35)) + (((uint128_t)x13 * x33) + (((uint128_t)x15 * x31) + ((0x2 * ((uint128_t)x17 * x29)) + ((0x2 * ((uint128_t)x19 * x27)) + ((0x2 * ((uint128_t)x21 * x25)) + ((uint128_t)x20 * x23))))))))));
+ { uint128_t x41 = ((((uint128_t)x5 * x39) + ((0x2 * ((uint128_t)x7 * x37)) + ((0x2 * ((uint128_t)x9 * x35)) + (((uint128_t)x11 * x33) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + ((0x2 * ((uint128_t)x17 * x27)) + ((0x2 * ((uint128_t)x19 * x25)) + ((uint128_t)x21 * x23))))))))) + (0x239 * ((uint128_t)x20 * x38)));
+ { uint128_t x42 = ((((uint128_t)x5 * x37) + ((0x2 * ((uint128_t)x7 * x35)) + (((uint128_t)x9 * x33) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + ((0x2 * ((uint128_t)x17 * x25)) + ((uint128_t)x19 * x23)))))))) + (0x239 * (((uint128_t)x21 * x38) + ((uint128_t)x20 * x39))));
+ { uint128_t x43 = ((((uint128_t)x5 * x35) + (((uint128_t)x7 * x33) + (((uint128_t)x9 * x31) + (((uint128_t)x11 * x29) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + ((uint128_t)x17 * x23))))))) + (0x239 * (((uint128_t)x19 * x38) + (((uint128_t)x21 * x39) + ((uint128_t)x20 * x37)))));
+ { uint128_t x44 = ((((uint128_t)x5 * x33) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + ((0x2 * ((uint128_t)x11 * x27)) + ((0x2 * ((uint128_t)x13 * x25)) + ((uint128_t)x15 * x23)))))) + (0x239 * ((0x2 * ((uint128_t)x17 * x38)) + ((0x2 * ((uint128_t)x19 * x39)) + ((0x2 * ((uint128_t)x21 * x37)) + (0x2 * ((uint128_t)x20 * x35)))))));
+ { uint128_t x45 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((uint128_t)x13 * x23))))) + (0x239 * (((uint128_t)x15 * x38) + ((0x2 * ((uint128_t)x17 * x39)) + ((0x2 * ((uint128_t)x19 * x37)) + ((0x2 * ((uint128_t)x21 * x35)) + ((uint128_t)x20 * x33)))))));
+ { uint128_t x46 = ((((uint128_t)x5 * x29) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((uint128_t)x11 * x23)))) + (0x239 * (((uint128_t)x13 * x38) + (((uint128_t)x15 * x39) + ((0x2 * ((uint128_t)x17 * x37)) + ((0x2 * ((uint128_t)x19 * x35)) + (((uint128_t)x21 * x33) + ((uint128_t)x20 * x31))))))));
+ { uint128_t x47 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((uint128_t)x9 * x23))) + (0x239 * (((uint128_t)x11 * x38) + (((uint128_t)x13 * x39) + (((uint128_t)x15 * x37) + ((0x2 * ((uint128_t)x17 * x35)) + (((uint128_t)x19 * x33) + (((uint128_t)x21 * x31) + ((uint128_t)x20 * x29)))))))));
+ { uint128_t x48 = ((((uint128_t)x5 * x25) + ((uint128_t)x7 * x23)) + (0x239 * (((uint128_t)x9 * x38) + (((uint128_t)x11 * x39) + (((uint128_t)x13 * x37) + (((uint128_t)x15 * x35) + (((uint128_t)x17 * x33) + (((uint128_t)x19 * x31) + (((uint128_t)x21 * x29) + ((uint128_t)x20 * x27))))))))));
+ { uint128_t x49 = (((uint128_t)x5 * x23) + (0x239 * ((0x2 * ((uint128_t)x7 * x38)) + ((0x2 * ((uint128_t)x9 * x39)) + ((0x2 * ((uint128_t)x11 * x37)) + ((0x2 * ((uint128_t)x13 * x35)) + (((uint128_t)x15 * x33) + ((0x2 * ((uint128_t)x17 * x31)) + ((0x2 * ((uint128_t)x19 * x29)) + ((0x2 * ((uint128_t)x21 * x27)) + (0x2 * ((uint128_t)x20 * x25))))))))))));
+ { uint128_t x50 = (x49 >> 0x34);
+ { uint64_t x51 = ((uint64_t)x49 & 0xfffffffffffff);
+ { uint128_t x52 = (x50 + x48);
+ { uint128_t x53 = (x52 >> 0x33);
+ { uint64_t x54 = ((uint64_t)x52 & 0x7ffffffffffff);
+ { uint128_t x55 = (x53 + x47);
+ { uint128_t x56 = (x55 >> 0x33);
+ { uint64_t x57 = ((uint64_t)x55 & 0x7ffffffffffff);
+ { uint128_t x58 = (x56 + x46);
+ { uint128_t x59 = (x58 >> 0x33);
+ { uint64_t x60 = ((uint64_t)x58 & 0x7ffffffffffff);
+ { uint128_t x61 = (x59 + x45);
+ { uint128_t x62 = (x61 >> 0x33);
+ { uint64_t x63 = ((uint64_t)x61 & 0x7ffffffffffff);
+ { uint128_t x64 = (x62 + x44);
+ { uint128_t x65 = (x64 >> 0x34);
+ { uint64_t x66 = ((uint64_t)x64 & 0xfffffffffffff);
+ { uint128_t x67 = (x65 + x43);
+ { uint128_t x68 = (x67 >> 0x33);
+ { uint64_t x69 = ((uint64_t)x67 & 0x7ffffffffffff);
+ { uint128_t x70 = (x68 + x42);
+ { uint128_t x71 = (x70 >> 0x33);
+ { uint64_t x72 = ((uint64_t)x70 & 0x7ffffffffffff);
+ { uint128_t x73 = (x71 + x41);
+ { uint64_t x74 = (uint64_t) (x73 >> 0x33);
+ { uint64_t x75 = ((uint64_t)x73 & 0x7ffffffffffff);
+ { uint128_t x76 = (x74 + x40);
+ { uint64_t x77 = (uint64_t) (x76 >> 0x33);
+ { uint64_t x78 = ((uint64_t)x76 & 0x7ffffffffffff);
+ { uint128_t x79 = (x51 + ((uint128_t)0x239 * x77));
+ { uint64_t x80 = (uint64_t) (x79 >> 0x34);
+ { uint64_t x81 = ((uint64_t)x79 & 0xfffffffffffff);
+ { uint64_t x82 = (x80 + x54);
+ { uint64_t x83 = (x82 >> 0x33);
+ { uint64_t x84 = (x82 & 0x7ffffffffffff);
+ out[0] = x81;
+ out[1] = x84;
+ out[2] = (x83 + x57);
+ out[3] = x60;
+ out[4] = x63;
+ out[5] = x66;
+ out[6] = x69;
+ out[7] = x72;
+ out[8] = x75;
+ out[9] = x78;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e512m569/fesquare.c b/src/Specific/solinas64_2e512m569/fesquare.c
index 61f6a5c8c..c8ecbdfda 100644
--- a/src/Specific/solinas64_2e512m569/fesquare.c
+++ b/src/Specific/solinas64_2e512m569/fesquare.c
@@ -1,76 +1,68 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x19 = (((uint128_t)x2 * x17) + ((0x2 * ((uint128_t)x4 * x18)) + ((0x2 * ((uint128_t)x6 * x16)) + ((0x2 * ((uint128_t)x8 * x14)) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + ((0x2 * ((uint128_t)x14 * x8)) + ((0x2 * ((uint128_t)x16 * x6)) + ((0x2 * ((uint128_t)x18 * x4)) + ((uint128_t)x17 * x2))))))))));
-{ uint128_t x20 = ((((uint128_t)x2 * x18) + ((0x2 * ((uint128_t)x4 * x16)) + ((0x2 * ((uint128_t)x6 * x14)) + (((uint128_t)x8 * x12) + (((uint128_t)x10 * x10) + (((uint128_t)x12 * x8) + ((0x2 * ((uint128_t)x14 * x6)) + ((0x2 * ((uint128_t)x16 * x4)) + ((uint128_t)x18 * x2))))))))) + (0x239 * ((uint128_t)x17 * x17)));
-{ uint128_t x21 = ((((uint128_t)x2 * x16) + ((0x2 * ((uint128_t)x4 * x14)) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + ((0x2 * ((uint128_t)x14 * x4)) + ((uint128_t)x16 * x2)))))))) + (0x239 * (((uint128_t)x18 * x17) + ((uint128_t)x17 * x18))));
-{ uint128_t x22 = ((((uint128_t)x2 * x14) + (((uint128_t)x4 * x12) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + (((uint128_t)x12 * x4) + ((uint128_t)x14 * x2))))))) + (0x239 * (((uint128_t)x16 * x17) + (((uint128_t)x18 * x18) + ((uint128_t)x17 * x16)))));
-{ uint128_t x23 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0x239 * ((0x2 * ((uint128_t)x14 * x17)) + ((0x2 * ((uint128_t)x16 * x18)) + ((0x2 * ((uint128_t)x18 * x16)) + (0x2 * ((uint128_t)x17 * x14)))))));
-{ uint128_t x24 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x239 * (((uint128_t)x12 * x17) + ((0x2 * ((uint128_t)x14 * x18)) + ((0x2 * ((uint128_t)x16 * x16)) + ((0x2 * ((uint128_t)x18 * x14)) + ((uint128_t)x17 * x12)))))));
-{ uint128_t x25 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x239 * (((uint128_t)x10 * x17) + (((uint128_t)x12 * x18) + ((0x2 * ((uint128_t)x14 * x16)) + ((0x2 * ((uint128_t)x16 * x14)) + (((uint128_t)x18 * x12) + ((uint128_t)x17 * x10))))))));
-{ uint128_t x26 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x239 * (((uint128_t)x8 * x17) + (((uint128_t)x10 * x18) + (((uint128_t)x12 * x16) + ((0x2 * ((uint128_t)x14 * x14)) + (((uint128_t)x16 * x12) + (((uint128_t)x18 * x10) + ((uint128_t)x17 * x8)))))))));
-{ uint128_t x27 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x239 * (((uint128_t)x6 * x17) + (((uint128_t)x8 * x18) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + (((uint128_t)x18 * x8) + ((uint128_t)x17 * x6))))))))));
-{ uint128_t x28 = (((uint128_t)x2 * x2) + (0x239 * ((0x2 * ((uint128_t)x4 * x17)) + ((0x2 * ((uint128_t)x6 * x18)) + ((0x2 * ((uint128_t)x8 * x16)) + ((0x2 * ((uint128_t)x10 * x14)) + (((uint128_t)x12 * x12) + ((0x2 * ((uint128_t)x14 * x10)) + ((0x2 * ((uint128_t)x16 * x8)) + ((0x2 * ((uint128_t)x18 * x6)) + (0x2 * ((uint128_t)x17 * x4))))))))))));
-{ uint128_t x29 = (x28 >> 0x34);
-{ uint64_t x30 = ((uint64_t)x28 & 0xfffffffffffff);
-{ uint128_t x31 = (x29 + x27);
-{ uint128_t x32 = (x31 >> 0x33);
-{ uint64_t x33 = ((uint64_t)x31 & 0x7ffffffffffff);
-{ uint128_t x34 = (x32 + x26);
-{ uint128_t x35 = (x34 >> 0x33);
-{ uint64_t x36 = ((uint64_t)x34 & 0x7ffffffffffff);
-{ uint128_t x37 = (x35 + x25);
-{ uint128_t x38 = (x37 >> 0x33);
-{ uint64_t x39 = ((uint64_t)x37 & 0x7ffffffffffff);
-{ uint128_t x40 = (x38 + x24);
-{ uint128_t x41 = (x40 >> 0x33);
-{ uint64_t x42 = ((uint64_t)x40 & 0x7ffffffffffff);
-{ uint128_t x43 = (x41 + x23);
-{ uint128_t x44 = (x43 >> 0x34);
-{ uint64_t x45 = ((uint64_t)x43 & 0xfffffffffffff);
-{ uint128_t x46 = (x44 + x22);
-{ uint128_t x47 = (x46 >> 0x33);
-{ uint64_t x48 = ((uint64_t)x46 & 0x7ffffffffffff);
-{ uint128_t x49 = (x47 + x21);
-{ uint128_t x50 = (x49 >> 0x33);
-{ uint64_t x51 = ((uint64_t)x49 & 0x7ffffffffffff);
-{ uint128_t x52 = (x50 + x20);
-{ uint64_t x53 = (uint64_t) (x52 >> 0x33);
-{ uint64_t x54 = ((uint64_t)x52 & 0x7ffffffffffff);
-{ uint128_t x55 = (x53 + x19);
-{ uint64_t x56 = (uint64_t) (x55 >> 0x33);
-{ uint64_t x57 = ((uint64_t)x55 & 0x7ffffffffffff);
-{ uint128_t x58 = (x30 + ((uint128_t)0x239 * x56));
-{ uint64_t x59 = (uint64_t) (x58 >> 0x34);
-{ uint64_t x60 = ((uint64_t)x58 & 0xfffffffffffff);
-{ uint64_t x61 = (x59 + x33);
-{ uint64_t x62 = (x61 >> 0x33);
-{ uint64_t x63 = (x61 & 0x7ffffffffffff);
-out[0] = x57;
-out[1] = x54;
-out[2] = x51;
-out[3] = x48;
-out[4] = x45;
-out[5] = x42;
-out[6] = x39;
-out[7] = x62 + x36;
-out[8] = x63;
-out[9] = x60;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void fesquare(uint64_t out[10], const uint64_t in1[10]) {
+ { const uint64_t x17 = in1[9];
+ { const uint64_t x18 = in1[8];
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x19 = (((uint128_t)x2 * x17) + ((0x2 * ((uint128_t)x4 * x18)) + ((0x2 * ((uint128_t)x6 * x16)) + ((0x2 * ((uint128_t)x8 * x14)) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + ((0x2 * ((uint128_t)x14 * x8)) + ((0x2 * ((uint128_t)x16 * x6)) + ((0x2 * ((uint128_t)x18 * x4)) + ((uint128_t)x17 * x2))))))))));
+ { uint128_t x20 = ((((uint128_t)x2 * x18) + ((0x2 * ((uint128_t)x4 * x16)) + ((0x2 * ((uint128_t)x6 * x14)) + (((uint128_t)x8 * x12) + (((uint128_t)x10 * x10) + (((uint128_t)x12 * x8) + ((0x2 * ((uint128_t)x14 * x6)) + ((0x2 * ((uint128_t)x16 * x4)) + ((uint128_t)x18 * x2))))))))) + (0x239 * ((uint128_t)x17 * x17)));
+ { uint128_t x21 = ((((uint128_t)x2 * x16) + ((0x2 * ((uint128_t)x4 * x14)) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + ((0x2 * ((uint128_t)x14 * x4)) + ((uint128_t)x16 * x2)))))))) + (0x239 * (((uint128_t)x18 * x17) + ((uint128_t)x17 * x18))));
+ { uint128_t x22 = ((((uint128_t)x2 * x14) + (((uint128_t)x4 * x12) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + (((uint128_t)x12 * x4) + ((uint128_t)x14 * x2))))))) + (0x239 * (((uint128_t)x16 * x17) + (((uint128_t)x18 * x18) + ((uint128_t)x17 * x16)))));
+ { uint128_t x23 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0x239 * ((0x2 * ((uint128_t)x14 * x17)) + ((0x2 * ((uint128_t)x16 * x18)) + ((0x2 * ((uint128_t)x18 * x16)) + (0x2 * ((uint128_t)x17 * x14)))))));
+ { uint128_t x24 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x239 * (((uint128_t)x12 * x17) + ((0x2 * ((uint128_t)x14 * x18)) + ((0x2 * ((uint128_t)x16 * x16)) + ((0x2 * ((uint128_t)x18 * x14)) + ((uint128_t)x17 * x12)))))));
+ { uint128_t x25 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x239 * (((uint128_t)x10 * x17) + (((uint128_t)x12 * x18) + ((0x2 * ((uint128_t)x14 * x16)) + ((0x2 * ((uint128_t)x16 * x14)) + (((uint128_t)x18 * x12) + ((uint128_t)x17 * x10))))))));
+ { uint128_t x26 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x239 * (((uint128_t)x8 * x17) + (((uint128_t)x10 * x18) + (((uint128_t)x12 * x16) + ((0x2 * ((uint128_t)x14 * x14)) + (((uint128_t)x16 * x12) + (((uint128_t)x18 * x10) + ((uint128_t)x17 * x8)))))))));
+ { uint128_t x27 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x239 * (((uint128_t)x6 * x17) + (((uint128_t)x8 * x18) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + (((uint128_t)x18 * x8) + ((uint128_t)x17 * x6))))))))));
+ { uint128_t x28 = (((uint128_t)x2 * x2) + (0x239 * ((0x2 * ((uint128_t)x4 * x17)) + ((0x2 * ((uint128_t)x6 * x18)) + ((0x2 * ((uint128_t)x8 * x16)) + ((0x2 * ((uint128_t)x10 * x14)) + (((uint128_t)x12 * x12) + ((0x2 * ((uint128_t)x14 * x10)) + ((0x2 * ((uint128_t)x16 * x8)) + ((0x2 * ((uint128_t)x18 * x6)) + (0x2 * ((uint128_t)x17 * x4))))))))))));
+ { uint128_t x29 = (x28 >> 0x34);
+ { uint64_t x30 = ((uint64_t)x28 & 0xfffffffffffff);
+ { uint128_t x31 = (x29 + x27);
+ { uint128_t x32 = (x31 >> 0x33);
+ { uint64_t x33 = ((uint64_t)x31 & 0x7ffffffffffff);
+ { uint128_t x34 = (x32 + x26);
+ { uint128_t x35 = (x34 >> 0x33);
+ { uint64_t x36 = ((uint64_t)x34 & 0x7ffffffffffff);
+ { uint128_t x37 = (x35 + x25);
+ { uint128_t x38 = (x37 >> 0x33);
+ { uint64_t x39 = ((uint64_t)x37 & 0x7ffffffffffff);
+ { uint128_t x40 = (x38 + x24);
+ { uint128_t x41 = (x40 >> 0x33);
+ { uint64_t x42 = ((uint64_t)x40 & 0x7ffffffffffff);
+ { uint128_t x43 = (x41 + x23);
+ { uint128_t x44 = (x43 >> 0x34);
+ { uint64_t x45 = ((uint64_t)x43 & 0xfffffffffffff);
+ { uint128_t x46 = (x44 + x22);
+ { uint128_t x47 = (x46 >> 0x33);
+ { uint64_t x48 = ((uint64_t)x46 & 0x7ffffffffffff);
+ { uint128_t x49 = (x47 + x21);
+ { uint128_t x50 = (x49 >> 0x33);
+ { uint64_t x51 = ((uint64_t)x49 & 0x7ffffffffffff);
+ { uint128_t x52 = (x50 + x20);
+ { uint64_t x53 = (uint64_t) (x52 >> 0x33);
+ { uint64_t x54 = ((uint64_t)x52 & 0x7ffffffffffff);
+ { uint128_t x55 = (x53 + x19);
+ { uint64_t x56 = (uint64_t) (x55 >> 0x33);
+ { uint64_t x57 = ((uint64_t)x55 & 0x7ffffffffffff);
+ { uint128_t x58 = (x30 + ((uint128_t)0x239 * x56));
+ { uint64_t x59 = (uint64_t) (x58 >> 0x34);
+ { uint64_t x60 = ((uint64_t)x58 & 0xfffffffffffff);
+ { uint64_t x61 = (x59 + x33);
+ { uint64_t x62 = (x61 >> 0x33);
+ { uint64_t x63 = (x61 & 0x7ffffffffffff);
+ out[0] = x60;
+ out[1] = x63;
+ out[2] = (x62 + x36);
+ out[3] = x39;
+ out[4] = x42;
+ out[5] = x45;
+ out[6] = x48;
+ out[7] = x51;
+ out[8] = x54;
+ out[9] = x57;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e512m569/freeze.c b/src/Specific/solinas64_2e512m569/freeze.c
index ab6b2036f..af890dae2 100644
--- a/src/Specific/solinas64_2e512m569/freeze.c
+++ b/src/Specific/solinas64_2e512m569/freeze.c
@@ -1,25 +1,54 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x20;
-out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 52 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0xffffffffffdc7;;
+static void freeze(uint64_t out[10], const uint64_t in1[10]) {
+ { const uint64_t x17 = in1[9];
+ { const uint64_t x18 = in1[8];
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x20, uint8_t x21 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0xffffffffffdc7);
+ { uint64_t x23; uint8_t x24 = _subborrow_u51(x21, x4, 0x7ffffffffffff, &x23);
+ { uint64_t x26; uint8_t x27 = _subborrow_u51(x24, x6, 0x7ffffffffffff, &x26);
+ { uint64_t x29; uint8_t x30 = _subborrow_u51(x27, x8, 0x7ffffffffffff, &x29);
+ { uint64_t x32; uint8_t x33 = _subborrow_u51(x30, x10, 0x7ffffffffffff, &x32);
+ { uint64_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x33, Return x12, 0xfffffffffffff);
+ { uint64_t x38; uint8_t x39 = _subborrow_u51(x36, x14, 0x7ffffffffffff, &x38);
+ { uint64_t x41; uint8_t x42 = _subborrow_u51(x39, x16, 0x7ffffffffffff, &x41);
+ { uint64_t x44; uint8_t x45 = _subborrow_u51(x42, x18, 0x7ffffffffffff, &x44);
+ { uint64_t x47; uint8_t x48 = _subborrow_u51(x45, x17, 0x7ffffffffffff, &x47);
+ { uint64_t x49 = (uint64_t)cmovznz(x48, 0x0, 0xffffffffffffffffL);
+ { uint64_t x50 = (x49 & 0xffffffffffdc7);
+ { uint64_t x52, uint8_t x53 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x20, Return x50);
+ { uint64_t x54 = (x49 & 0x7ffffffffffff);
+ { uint64_t x56; uint8_t x57 = _addcarryx_u51(x53, x23, x54, &x56);
+ { uint64_t x58 = (x49 & 0x7ffffffffffff);
+ { uint64_t x60; uint8_t x61 = _addcarryx_u51(x57, x26, x58, &x60);
+ { uint64_t x62 = (x49 & 0x7ffffffffffff);
+ { uint64_t x64; uint8_t x65 = _addcarryx_u51(x61, x29, x62, &x64);
+ { uint64_t x66 = (x49 & 0x7ffffffffffff);
+ { uint64_t x68; uint8_t x69 = _addcarryx_u51(x65, x32, x66, &x68);
+ { uint64_t x70 = (x49 & 0xfffffffffffff);
+ { uint64_t x72, uint8_t x73 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x69, Return x35, Return x70);
+ { uint64_t x74 = (x49 & 0x7ffffffffffff);
+ { uint64_t x76; uint8_t x77 = _addcarryx_u51(x73, x38, x74, &x76);
+ { uint64_t x78 = (x49 & 0x7ffffffffffff);
+ { uint64_t x80; uint8_t x81 = _addcarryx_u51(x77, x41, x78, &x80);
+ { uint64_t x82 = (x49 & 0x7ffffffffffff);
+ { uint64_t x84; uint8_t x85 = _addcarryx_u51(x81, x44, x82, &x84);
+ { uint64_t x86 = (x49 & 0x7ffffffffffff);
+ { uint64_t x88; uint8_t _ = _addcarryx_u51(x85, x47, x86, &x88);
+ out[0] = x52;
+ out[1] = x56;
+ out[2] = x60;
+ out[3] = x64;
+ out[4] = x68;
+ out[5] = x72;
+ out[6] = x76;
+ out[7] = x80;
+ out[8] = x84;
+ out[9] = x88;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e521m1/femul.c b/src/Specific/solinas64_2e521m1/femul.c
index 085ed88ea..08cf14225 100644
--- a/src/Specific/solinas64_2e521m1/femul.c
+++ b/src/Specific/solinas64_2e521m1/femul.c
@@ -1,76 +1,78 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "femul.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
-{ uint128_t x40 = (((uint128_t)x5 * x38) + ((0x2 * ((uint128_t)x7 * x39)) + ((0x2 * ((uint128_t)x9 * x37)) + ((0x2 * ((uint128_t)x11 * x35)) + ((0x2 * ((uint128_t)x13 * x33)) + ((0x2 * ((uint128_t)x15 * x31)) + ((0x2 * ((uint128_t)x17 * x29)) + ((0x2 * ((uint128_t)x19 * x27)) + ((0x2 * ((uint128_t)x21 * x25)) + ((uint128_t)x20 * x23))))))))));
-{ uint128_t x41 = ((((uint128_t)x5 * x39) + ((0x2 * ((uint128_t)x7 * x37)) + ((0x2 * ((uint128_t)x9 * x35)) + ((0x2 * ((uint128_t)x11 * x33)) + ((0x2 * ((uint128_t)x13 * x31)) + ((0x2 * ((uint128_t)x15 * x29)) + ((0x2 * ((uint128_t)x17 * x27)) + ((0x2 * ((uint128_t)x19 * x25)) + ((uint128_t)x21 * x23))))))))) + ((uint128_t)x20 * x38));
-{ uint128_t x42 = ((((uint128_t)x5 * x37) + ((0x2 * ((uint128_t)x7 * x35)) + ((0x2 * ((uint128_t)x9 * x33)) + ((0x2 * ((uint128_t)x11 * x31)) + ((0x2 * ((uint128_t)x13 * x29)) + ((0x2 * ((uint128_t)x15 * x27)) + ((0x2 * ((uint128_t)x17 * x25)) + ((uint128_t)x19 * x23)))))))) + (((uint128_t)x21 * x38) + ((uint128_t)x20 * x39)));
-{ uint128_t x43 = ((((uint128_t)x5 * x35) + ((0x2 * ((uint128_t)x7 * x33)) + ((0x2 * ((uint128_t)x9 * x31)) + ((0x2 * ((uint128_t)x11 * x29)) + ((0x2 * ((uint128_t)x13 * x27)) + ((0x2 * ((uint128_t)x15 * x25)) + ((uint128_t)x17 * x23))))))) + (((uint128_t)x19 * x38) + (((uint128_t)x21 * x39) + ((uint128_t)x20 * x37))));
-{ uint128_t x44 = ((((uint128_t)x5 * x33) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + ((0x2 * ((uint128_t)x11 * x27)) + ((0x2 * ((uint128_t)x13 * x25)) + ((uint128_t)x15 * x23)))))) + (((uint128_t)x17 * x38) + (((uint128_t)x19 * x39) + (((uint128_t)x21 * x37) + ((uint128_t)x20 * x35)))));
-{ uint128_t x45 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((uint128_t)x13 * x23))))) + (((uint128_t)x15 * x38) + (((uint128_t)x17 * x39) + (((uint128_t)x19 * x37) + (((uint128_t)x21 * x35) + ((uint128_t)x20 * x33))))));
-{ uint128_t x46 = ((((uint128_t)x5 * x29) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((uint128_t)x11 * x23)))) + (((uint128_t)x13 * x38) + (((uint128_t)x15 * x39) + (((uint128_t)x17 * x37) + (((uint128_t)x19 * x35) + (((uint128_t)x21 * x33) + ((uint128_t)x20 * x31)))))));
-{ uint128_t x47 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((uint128_t)x9 * x23))) + (((uint128_t)x11 * x38) + (((uint128_t)x13 * x39) + (((uint128_t)x15 * x37) + (((uint128_t)x17 * x35) + (((uint128_t)x19 * x33) + (((uint128_t)x21 * x31) + ((uint128_t)x20 * x29))))))));
-{ uint128_t x48 = ((((uint128_t)x5 * x25) + ((uint128_t)x7 * x23)) + (((uint128_t)x9 * x38) + (((uint128_t)x11 * x39) + (((uint128_t)x13 * x37) + (((uint128_t)x15 * x35) + (((uint128_t)x17 * x33) + (((uint128_t)x19 * x31) + (((uint128_t)x21 * x29) + ((uint128_t)x20 * x27)))))))));
-{ uint128_t x49 = (((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x38)) + ((0x2 * ((uint128_t)x9 * x39)) + ((0x2 * ((uint128_t)x11 * x37)) + ((0x2 * ((uint128_t)x13 * x35)) + ((0x2 * ((uint128_t)x15 * x33)) + ((0x2 * ((uint128_t)x17 * x31)) + ((0x2 * ((uint128_t)x19 * x29)) + ((0x2 * ((uint128_t)x21 * x27)) + (0x2 * ((uint128_t)x20 * x25)))))))))));
-{ uint64_t x50 = (uint64_t) (x49 >> 0x35);
-{ uint64_t x51 = ((uint64_t)x49 & 0x1fffffffffffff);
-{ uint128_t x52 = (x50 + x48);
-{ uint64_t x53 = (uint64_t) (x52 >> 0x34);
-{ uint64_t x54 = ((uint64_t)x52 & 0xfffffffffffff);
-{ uint128_t x55 = (x53 + x47);
-{ uint64_t x56 = (uint64_t) (x55 >> 0x34);
-{ uint64_t x57 = ((uint64_t)x55 & 0xfffffffffffff);
-{ uint128_t x58 = (x56 + x46);
-{ uint64_t x59 = (uint64_t) (x58 >> 0x34);
-{ uint64_t x60 = ((uint64_t)x58 & 0xfffffffffffff);
-{ uint128_t x61 = (x59 + x45);
-{ uint64_t x62 = (uint64_t) (x61 >> 0x34);
-{ uint64_t x63 = ((uint64_t)x61 & 0xfffffffffffff);
-{ uint128_t x64 = (x62 + x44);
-{ uint64_t x65 = (uint64_t) (x64 >> 0x34);
-{ uint64_t x66 = ((uint64_t)x64 & 0xfffffffffffff);
-{ uint128_t x67 = (x65 + x43);
-{ uint64_t x68 = (uint64_t) (x67 >> 0x34);
-{ uint64_t x69 = ((uint64_t)x67 & 0xfffffffffffff);
-{ uint128_t x70 = (x68 + x42);
-{ uint64_t x71 = (uint64_t) (x70 >> 0x34);
-{ uint64_t x72 = ((uint64_t)x70 & 0xfffffffffffff);
-{ uint128_t x73 = (x71 + x41);
-{ uint64_t x74 = (uint64_t) (x73 >> 0x34);
-{ uint64_t x75 = ((uint64_t)x73 & 0xfffffffffffff);
-{ uint128_t x76 = (x74 + x40);
-{ uint64_t x77 = (uint64_t) (x76 >> 0x34);
-{ uint64_t x78 = ((uint64_t)x76 & 0xfffffffffffff);
-{ uint64_t x79 = (x51 + x77);
-{ uint64_t x80 = (x79 >> 0x35);
-{ uint64_t x81 = (x79 & 0x1fffffffffffff);
-{ uint64_t x82 = (x80 + x54);
-{ uint64_t x83 = (x82 >> 0x34);
-{ uint64_t x84 = (x82 & 0xfffffffffffff);
-out[0] = x78;
-out[1] = x75;
-out[2] = x72;
-out[3] = x69;
-out[4] = x66;
-out[5] = x63;
-out[6] = x60;
-out[7] = x83 + x57;
-out[8] = x84;
-out[9] = x81;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void femul(uint64_t out[10], const uint64_t in1[10], const uint64_t in2[10]) {
+ { const uint64_t x20 = in1[9];
+ { const uint64_t x21 = in1[8];
+ { const uint64_t x19 = in1[7];
+ { const uint64_t x17 = in1[6];
+ { const uint64_t x15 = in1[5];
+ { const uint64_t x13 = in1[4];
+ { const uint64_t x11 = in1[3];
+ { const uint64_t x9 = in1[2];
+ { const uint64_t x7 = in1[1];
+ { const uint64_t x5 = in1[0];
+ { const uint64_t x38 = in2[9];
+ { const uint64_t x39 = in2[8];
+ { const uint64_t x37 = in2[7];
+ { const uint64_t x35 = in2[6];
+ { const uint64_t x33 = in2[5];
+ { const uint64_t x31 = in2[4];
+ { const uint64_t x29 = in2[3];
+ { const uint64_t x27 = in2[2];
+ { const uint64_t x25 = in2[1];
+ { const uint64_t x23 = in2[0];
+ { uint128_t x40 = (((uint128_t)x5 * x38) + ((0x2 * ((uint128_t)x7 * x39)) + ((0x2 * ((uint128_t)x9 * x37)) + ((0x2 * ((uint128_t)x11 * x35)) + ((0x2 * ((uint128_t)x13 * x33)) + ((0x2 * ((uint128_t)x15 * x31)) + ((0x2 * ((uint128_t)x17 * x29)) + ((0x2 * ((uint128_t)x19 * x27)) + ((0x2 * ((uint128_t)x21 * x25)) + ((uint128_t)x20 * x23))))))))));
+ { uint128_t x41 = ((((uint128_t)x5 * x39) + ((0x2 * ((uint128_t)x7 * x37)) + ((0x2 * ((uint128_t)x9 * x35)) + ((0x2 * ((uint128_t)x11 * x33)) + ((0x2 * ((uint128_t)x13 * x31)) + ((0x2 * ((uint128_t)x15 * x29)) + ((0x2 * ((uint128_t)x17 * x27)) + ((0x2 * ((uint128_t)x19 * x25)) + ((uint128_t)x21 * x23))))))))) + ((uint128_t)x20 * x38));
+ { uint128_t x42 = ((((uint128_t)x5 * x37) + ((0x2 * ((uint128_t)x7 * x35)) + ((0x2 * ((uint128_t)x9 * x33)) + ((0x2 * ((uint128_t)x11 * x31)) + ((0x2 * ((uint128_t)x13 * x29)) + ((0x2 * ((uint128_t)x15 * x27)) + ((0x2 * ((uint128_t)x17 * x25)) + ((uint128_t)x19 * x23)))))))) + (((uint128_t)x21 * x38) + ((uint128_t)x20 * x39)));
+ { uint128_t x43 = ((((uint128_t)x5 * x35) + ((0x2 * ((uint128_t)x7 * x33)) + ((0x2 * ((uint128_t)x9 * x31)) + ((0x2 * ((uint128_t)x11 * x29)) + ((0x2 * ((uint128_t)x13 * x27)) + ((0x2 * ((uint128_t)x15 * x25)) + ((uint128_t)x17 * x23))))))) + (((uint128_t)x19 * x38) + (((uint128_t)x21 * x39) + ((uint128_t)x20 * x37))));
+ { uint128_t x44 = ((((uint128_t)x5 * x33) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + ((0x2 * ((uint128_t)x11 * x27)) + ((0x2 * ((uint128_t)x13 * x25)) + ((uint128_t)x15 * x23)))))) + (((uint128_t)x17 * x38) + (((uint128_t)x19 * x39) + (((uint128_t)x21 * x37) + ((uint128_t)x20 * x35)))));
+ { uint128_t x45 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((uint128_t)x13 * x23))))) + (((uint128_t)x15 * x38) + (((uint128_t)x17 * x39) + (((uint128_t)x19 * x37) + (((uint128_t)x21 * x35) + ((uint128_t)x20 * x33))))));
+ { uint128_t x46 = ((((uint128_t)x5 * x29) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((uint128_t)x11 * x23)))) + (((uint128_t)x13 * x38) + (((uint128_t)x15 * x39) + (((uint128_t)x17 * x37) + (((uint128_t)x19 * x35) + (((uint128_t)x21 * x33) + ((uint128_t)x20 * x31)))))));
+ { uint128_t x47 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((uint128_t)x9 * x23))) + (((uint128_t)x11 * x38) + (((uint128_t)x13 * x39) + (((uint128_t)x15 * x37) + (((uint128_t)x17 * x35) + (((uint128_t)x19 * x33) + (((uint128_t)x21 * x31) + ((uint128_t)x20 * x29))))))));
+ { uint128_t x48 = ((((uint128_t)x5 * x25) + ((uint128_t)x7 * x23)) + (((uint128_t)x9 * x38) + (((uint128_t)x11 * x39) + (((uint128_t)x13 * x37) + (((uint128_t)x15 * x35) + (((uint128_t)x17 * x33) + (((uint128_t)x19 * x31) + (((uint128_t)x21 * x29) + ((uint128_t)x20 * x27)))))))));
+ { uint128_t x49 = (((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x38)) + ((0x2 * ((uint128_t)x9 * x39)) + ((0x2 * ((uint128_t)x11 * x37)) + ((0x2 * ((uint128_t)x13 * x35)) + ((0x2 * ((uint128_t)x15 * x33)) + ((0x2 * ((uint128_t)x17 * x31)) + ((0x2 * ((uint128_t)x19 * x29)) + ((0x2 * ((uint128_t)x21 * x27)) + (0x2 * ((uint128_t)x20 * x25)))))))))));
+ { uint64_t x50 = (uint64_t) (x49 >> 0x35);
+ { uint64_t x51 = ((uint64_t)x49 & 0x1fffffffffffff);
+ { uint128_t x52 = (x50 + x48);
+ { uint64_t x53 = (uint64_t) (x52 >> 0x34);
+ { uint64_t x54 = ((uint64_t)x52 & 0xfffffffffffff);
+ { uint128_t x55 = (x53 + x47);
+ { uint64_t x56 = (uint64_t) (x55 >> 0x34);
+ { uint64_t x57 = ((uint64_t)x55 & 0xfffffffffffff);
+ { uint128_t x58 = (x56 + x46);
+ { uint64_t x59 = (uint64_t) (x58 >> 0x34);
+ { uint64_t x60 = ((uint64_t)x58 & 0xfffffffffffff);
+ { uint128_t x61 = (x59 + x45);
+ { uint64_t x62 = (uint64_t) (x61 >> 0x34);
+ { uint64_t x63 = ((uint64_t)x61 & 0xfffffffffffff);
+ { uint128_t x64 = (x62 + x44);
+ { uint64_t x65 = (uint64_t) (x64 >> 0x34);
+ { uint64_t x66 = ((uint64_t)x64 & 0xfffffffffffff);
+ { uint128_t x67 = (x65 + x43);
+ { uint64_t x68 = (uint64_t) (x67 >> 0x34);
+ { uint64_t x69 = ((uint64_t)x67 & 0xfffffffffffff);
+ { uint128_t x70 = (x68 + x42);
+ { uint64_t x71 = (uint64_t) (x70 >> 0x34);
+ { uint64_t x72 = ((uint64_t)x70 & 0xfffffffffffff);
+ { uint128_t x73 = (x71 + x41);
+ { uint64_t x74 = (uint64_t) (x73 >> 0x34);
+ { uint64_t x75 = ((uint64_t)x73 & 0xfffffffffffff);
+ { uint128_t x76 = (x74 + x40);
+ { uint64_t x77 = (uint64_t) (x76 >> 0x34);
+ { uint64_t x78 = ((uint64_t)x76 & 0xfffffffffffff);
+ { uint64_t x79 = (x51 + x77);
+ { uint64_t x80 = (x79 >> 0x35);
+ { uint64_t x81 = (x79 & 0x1fffffffffffff);
+ { uint64_t x82 = (x80 + x54);
+ { uint64_t x83 = (x82 >> 0x34);
+ { uint64_t x84 = (x82 & 0xfffffffffffff);
+ out[0] = x81;
+ out[1] = x84;
+ out[2] = (x83 + x57);
+ out[3] = x60;
+ out[4] = x63;
+ out[5] = x66;
+ out[6] = x69;
+ out[7] = x72;
+ out[8] = x75;
+ out[9] = x78;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e521m1/fesquare.c b/src/Specific/solinas64_2e521m1/fesquare.c
index 280bf114e..cb979cf16 100644
--- a/src/Specific/solinas64_2e521m1/fesquare.c
+++ b/src/Specific/solinas64_2e521m1/fesquare.c
@@ -1,76 +1,68 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "fesquare.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-{ uint128_t x19 = (((uint128_t)x2 * x17) + ((0x2 * ((uint128_t)x4 * x18)) + ((0x2 * ((uint128_t)x6 * x16)) + ((0x2 * ((uint128_t)x8 * x14)) + ((0x2 * ((uint128_t)x10 * x12)) + ((0x2 * ((uint128_t)x12 * x10)) + ((0x2 * ((uint128_t)x14 * x8)) + ((0x2 * ((uint128_t)x16 * x6)) + ((0x2 * ((uint128_t)x18 * x4)) + ((uint128_t)x17 * x2))))))))));
-{ uint128_t x20 = ((((uint128_t)x2 * x18) + ((0x2 * ((uint128_t)x4 * x16)) + ((0x2 * ((uint128_t)x6 * x14)) + ((0x2 * ((uint128_t)x8 * x12)) + ((0x2 * ((uint128_t)x10 * x10)) + ((0x2 * ((uint128_t)x12 * x8)) + ((0x2 * ((uint128_t)x14 * x6)) + ((0x2 * ((uint128_t)x16 * x4)) + ((uint128_t)x18 * x2))))))))) + ((uint128_t)x17 * x17));
-{ uint128_t x21 = ((((uint128_t)x2 * x16) + ((0x2 * ((uint128_t)x4 * x14)) + ((0x2 * ((uint128_t)x6 * x12)) + ((0x2 * ((uint128_t)x8 * x10)) + ((0x2 * ((uint128_t)x10 * x8)) + ((0x2 * ((uint128_t)x12 * x6)) + ((0x2 * ((uint128_t)x14 * x4)) + ((uint128_t)x16 * x2)))))))) + (((uint128_t)x18 * x17) + ((uint128_t)x17 * x18)));
-{ uint128_t x22 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + ((0x2 * ((uint128_t)x6 * x10)) + ((0x2 * ((uint128_t)x8 * x8)) + ((0x2 * ((uint128_t)x10 * x6)) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (((uint128_t)x16 * x17) + (((uint128_t)x18 * x18) + ((uint128_t)x17 * x16))));
-{ uint128_t x23 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (((uint128_t)x14 * x17) + (((uint128_t)x16 * x18) + (((uint128_t)x18 * x16) + ((uint128_t)x17 * x14)))));
-{ uint128_t x24 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (((uint128_t)x12 * x17) + (((uint128_t)x14 * x18) + (((uint128_t)x16 * x16) + (((uint128_t)x18 * x14) + ((uint128_t)x17 * x12))))));
-{ uint128_t x25 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (((uint128_t)x10 * x17) + (((uint128_t)x12 * x18) + (((uint128_t)x14 * x16) + (((uint128_t)x16 * x14) + (((uint128_t)x18 * x12) + ((uint128_t)x17 * x10)))))));
-{ uint128_t x26 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (((uint128_t)x8 * x17) + (((uint128_t)x10 * x18) + (((uint128_t)x12 * x16) + (((uint128_t)x14 * x14) + (((uint128_t)x16 * x12) + (((uint128_t)x18 * x10) + ((uint128_t)x17 * x8))))))));
-{ uint128_t x27 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (((uint128_t)x6 * x17) + (((uint128_t)x8 * x18) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + (((uint128_t)x18 * x8) + ((uint128_t)x17 * x6)))))))));
-{ uint128_t x28 = (((uint128_t)x2 * x2) + ((0x2 * ((uint128_t)x4 * x17)) + ((0x2 * ((uint128_t)x6 * x18)) + ((0x2 * ((uint128_t)x8 * x16)) + ((0x2 * ((uint128_t)x10 * x14)) + ((0x2 * ((uint128_t)x12 * x12)) + ((0x2 * ((uint128_t)x14 * x10)) + ((0x2 * ((uint128_t)x16 * x8)) + ((0x2 * ((uint128_t)x18 * x6)) + (0x2 * ((uint128_t)x17 * x4)))))))))));
-{ uint64_t x29 = (uint64_t) (x28 >> 0x35);
-{ uint64_t x30 = ((uint64_t)x28 & 0x1fffffffffffff);
-{ uint128_t x31 = (x29 + x27);
-{ uint64_t x32 = (uint64_t) (x31 >> 0x34);
-{ uint64_t x33 = ((uint64_t)x31 & 0xfffffffffffff);
-{ uint128_t x34 = (x32 + x26);
-{ uint64_t x35 = (uint64_t) (x34 >> 0x34);
-{ uint64_t x36 = ((uint64_t)x34 & 0xfffffffffffff);
-{ uint128_t x37 = (x35 + x25);
-{ uint64_t x38 = (uint64_t) (x37 >> 0x34);
-{ uint64_t x39 = ((uint64_t)x37 & 0xfffffffffffff);
-{ uint128_t x40 = (x38 + x24);
-{ uint64_t x41 = (uint64_t) (x40 >> 0x34);
-{ uint64_t x42 = ((uint64_t)x40 & 0xfffffffffffff);
-{ uint128_t x43 = (x41 + x23);
-{ uint64_t x44 = (uint64_t) (x43 >> 0x34);
-{ uint64_t x45 = ((uint64_t)x43 & 0xfffffffffffff);
-{ uint128_t x46 = (x44 + x22);
-{ uint64_t x47 = (uint64_t) (x46 >> 0x34);
-{ uint64_t x48 = ((uint64_t)x46 & 0xfffffffffffff);
-{ uint128_t x49 = (x47 + x21);
-{ uint64_t x50 = (uint64_t) (x49 >> 0x34);
-{ uint64_t x51 = ((uint64_t)x49 & 0xfffffffffffff);
-{ uint128_t x52 = (x50 + x20);
-{ uint64_t x53 = (uint64_t) (x52 >> 0x34);
-{ uint64_t x54 = ((uint64_t)x52 & 0xfffffffffffff);
-{ uint128_t x55 = (x53 + x19);
-{ uint64_t x56 = (uint64_t) (x55 >> 0x34);
-{ uint64_t x57 = ((uint64_t)x55 & 0xfffffffffffff);
-{ uint64_t x58 = (x30 + x56);
-{ uint64_t x59 = (x58 >> 0x35);
-{ uint64_t x60 = (x58 & 0x1fffffffffffff);
-{ uint64_t x61 = (x59 + x33);
-{ uint64_t x62 = (x61 >> 0x34);
-{ uint64_t x63 = (x61 & 0xfffffffffffff);
-out[0] = x57;
-out[1] = x54;
-out[2] = x51;
-out[3] = x48;
-out[4] = x45;
-out[5] = x42;
-out[6] = x39;
-out[7] = x62 + x36;
-out[8] = x63;
-out[9] = x60;
-}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
-// caller: uint64_t out[10];
+static void fesquare(uint64_t out[10], const uint64_t in1[10]) {
+ { const uint64_t x17 = in1[9];
+ { const uint64_t x18 = in1[8];
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint128_t x19 = (((uint128_t)x2 * x17) + ((0x2 * ((uint128_t)x4 * x18)) + ((0x2 * ((uint128_t)x6 * x16)) + ((0x2 * ((uint128_t)x8 * x14)) + ((0x2 * ((uint128_t)x10 * x12)) + ((0x2 * ((uint128_t)x12 * x10)) + ((0x2 * ((uint128_t)x14 * x8)) + ((0x2 * ((uint128_t)x16 * x6)) + ((0x2 * ((uint128_t)x18 * x4)) + ((uint128_t)x17 * x2))))))))));
+ { uint128_t x20 = ((((uint128_t)x2 * x18) + ((0x2 * ((uint128_t)x4 * x16)) + ((0x2 * ((uint128_t)x6 * x14)) + ((0x2 * ((uint128_t)x8 * x12)) + ((0x2 * ((uint128_t)x10 * x10)) + ((0x2 * ((uint128_t)x12 * x8)) + ((0x2 * ((uint128_t)x14 * x6)) + ((0x2 * ((uint128_t)x16 * x4)) + ((uint128_t)x18 * x2))))))))) + ((uint128_t)x17 * x17));
+ { uint128_t x21 = ((((uint128_t)x2 * x16) + ((0x2 * ((uint128_t)x4 * x14)) + ((0x2 * ((uint128_t)x6 * x12)) + ((0x2 * ((uint128_t)x8 * x10)) + ((0x2 * ((uint128_t)x10 * x8)) + ((0x2 * ((uint128_t)x12 * x6)) + ((0x2 * ((uint128_t)x14 * x4)) + ((uint128_t)x16 * x2)))))))) + (((uint128_t)x18 * x17) + ((uint128_t)x17 * x18)));
+ { uint128_t x22 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + ((0x2 * ((uint128_t)x6 * x10)) + ((0x2 * ((uint128_t)x8 * x8)) + ((0x2 * ((uint128_t)x10 * x6)) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (((uint128_t)x16 * x17) + (((uint128_t)x18 * x18) + ((uint128_t)x17 * x16))));
+ { uint128_t x23 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (((uint128_t)x14 * x17) + (((uint128_t)x16 * x18) + (((uint128_t)x18 * x16) + ((uint128_t)x17 * x14)))));
+ { uint128_t x24 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (((uint128_t)x12 * x17) + (((uint128_t)x14 * x18) + (((uint128_t)x16 * x16) + (((uint128_t)x18 * x14) + ((uint128_t)x17 * x12))))));
+ { uint128_t x25 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (((uint128_t)x10 * x17) + (((uint128_t)x12 * x18) + (((uint128_t)x14 * x16) + (((uint128_t)x16 * x14) + (((uint128_t)x18 * x12) + ((uint128_t)x17 * x10)))))));
+ { uint128_t x26 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (((uint128_t)x8 * x17) + (((uint128_t)x10 * x18) + (((uint128_t)x12 * x16) + (((uint128_t)x14 * x14) + (((uint128_t)x16 * x12) + (((uint128_t)x18 * x10) + ((uint128_t)x17 * x8))))))));
+ { uint128_t x27 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (((uint128_t)x6 * x17) + (((uint128_t)x8 * x18) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + (((uint128_t)x18 * x8) + ((uint128_t)x17 * x6)))))))));
+ { uint128_t x28 = (((uint128_t)x2 * x2) + ((0x2 * ((uint128_t)x4 * x17)) + ((0x2 * ((uint128_t)x6 * x18)) + ((0x2 * ((uint128_t)x8 * x16)) + ((0x2 * ((uint128_t)x10 * x14)) + ((0x2 * ((uint128_t)x12 * x12)) + ((0x2 * ((uint128_t)x14 * x10)) + ((0x2 * ((uint128_t)x16 * x8)) + ((0x2 * ((uint128_t)x18 * x6)) + (0x2 * ((uint128_t)x17 * x4)))))))))));
+ { uint64_t x29 = (uint64_t) (x28 >> 0x35);
+ { uint64_t x30 = ((uint64_t)x28 & 0x1fffffffffffff);
+ { uint128_t x31 = (x29 + x27);
+ { uint64_t x32 = (uint64_t) (x31 >> 0x34);
+ { uint64_t x33 = ((uint64_t)x31 & 0xfffffffffffff);
+ { uint128_t x34 = (x32 + x26);
+ { uint64_t x35 = (uint64_t) (x34 >> 0x34);
+ { uint64_t x36 = ((uint64_t)x34 & 0xfffffffffffff);
+ { uint128_t x37 = (x35 + x25);
+ { uint64_t x38 = (uint64_t) (x37 >> 0x34);
+ { uint64_t x39 = ((uint64_t)x37 & 0xfffffffffffff);
+ { uint128_t x40 = (x38 + x24);
+ { uint64_t x41 = (uint64_t) (x40 >> 0x34);
+ { uint64_t x42 = ((uint64_t)x40 & 0xfffffffffffff);
+ { uint128_t x43 = (x41 + x23);
+ { uint64_t x44 = (uint64_t) (x43 >> 0x34);
+ { uint64_t x45 = ((uint64_t)x43 & 0xfffffffffffff);
+ { uint128_t x46 = (x44 + x22);
+ { uint64_t x47 = (uint64_t) (x46 >> 0x34);
+ { uint64_t x48 = ((uint64_t)x46 & 0xfffffffffffff);
+ { uint128_t x49 = (x47 + x21);
+ { uint64_t x50 = (uint64_t) (x49 >> 0x34);
+ { uint64_t x51 = ((uint64_t)x49 & 0xfffffffffffff);
+ { uint128_t x52 = (x50 + x20);
+ { uint64_t x53 = (uint64_t) (x52 >> 0x34);
+ { uint64_t x54 = ((uint64_t)x52 & 0xfffffffffffff);
+ { uint128_t x55 = (x53 + x19);
+ { uint64_t x56 = (uint64_t) (x55 >> 0x34);
+ { uint64_t x57 = ((uint64_t)x55 & 0xfffffffffffff);
+ { uint64_t x58 = (x30 + x56);
+ { uint64_t x59 = (x58 >> 0x35);
+ { uint64_t x60 = (x58 & 0x1fffffffffffff);
+ { uint64_t x61 = (x59 + x33);
+ { uint64_t x62 = (x61 >> 0x34);
+ { uint64_t x63 = (x61 & 0xfffffffffffff);
+ out[0] = x60;
+ out[1] = x63;
+ out[2] = (x62 + x36);
+ out[3] = x39;
+ out[4] = x42;
+ out[5] = x45;
+ out[6] = x48;
+ out[7] = x51;
+ out[8] = x54;
+ out[9] = x57;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+}
diff --git a/src/Specific/solinas64_2e521m1/freeze.c b/src/Specific/solinas64_2e521m1/freeze.c
index 24db516df..3f8e4457a 100644
--- a/src/Specific/solinas64_2e521m1/freeze.c
+++ b/src/Specific/solinas64_2e521m1/freeze.c
@@ -1,25 +1,54 @@
-#include <stdint.h>
-#include <stdbool.h>
-#include <x86intrin.h>
-#include "liblow.h"
-
-#include "freeze.h"
-
-typedef unsigned int uint128_t __attribute__((mode(TI)));
-
-#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
-#define _subborrow_u32 __builtin_ia32_sbb_u32
-#define _subborrow_u64 __builtin_ia32_sbb_u64
-#endif
-
-#undef force_inline
-#define force_inline __attribute__((always_inline))
-
-void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
-out[0] = uint64_t x20;
-out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 53 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
-out[2] = x2;
-out[3] = 0x1fffffffffffff;;
+static void freeze(uint64_t out[10], const uint64_t in1[10]) {
+ { const uint64_t x17 = in1[9];
+ { const uint64_t x18 = in1[8];
+ { const uint64_t x16 = in1[7];
+ { const uint64_t x14 = in1[6];
+ { const uint64_t x12 = in1[5];
+ { const uint64_t x10 = in1[4];
+ { const uint64_t x8 = in1[3];
+ { const uint64_t x6 = in1[2];
+ { const uint64_t x4 = in1[1];
+ { const uint64_t x2 = in1[0];
+ { uint64_t x20, uint8_t x21 = Op (Syntax.SubWithGetBorrow 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, 0x1fffffffffffff);
+ { uint64_t x23, uint8_t x24 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x21, Return x4, 0xfffffffffffff);
+ { uint64_t x26, uint8_t x27 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x24, Return x6, 0xfffffffffffff);
+ { uint64_t x29, uint8_t x30 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x27, Return x8, 0xfffffffffffff);
+ { uint64_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x30, Return x10, 0xfffffffffffff);
+ { uint64_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x33, Return x12, 0xfffffffffffff);
+ { uint64_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x36, Return x14, 0xfffffffffffff);
+ { uint64_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x39, Return x16, 0xfffffffffffff);
+ { uint64_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x42, Return x18, 0xfffffffffffff);
+ { uint64_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x45, Return x17, 0xfffffffffffff);
+ { uint64_t x49 = (uint64_t)cmovznz(x48, 0x0, 0xffffffffffffffffL);
+ { uint64_t x50 = (x49 & 0x1fffffffffffff);
+ { uint64_t x52, uint8_t x53 = Op (Syntax.AddWithGetCarry 53 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x20, Return x50);
+ { uint64_t x54 = (x49 & 0xfffffffffffff);
+ { uint64_t x56, uint8_t x57 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x53, Return x23, Return x54);
+ { uint64_t x58 = (x49 & 0xfffffffffffff);
+ { uint64_t x60, uint8_t x61 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x57, Return x26, Return x58);
+ { uint64_t x62 = (x49 & 0xfffffffffffff);
+ { uint64_t x64, uint8_t x65 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x61, Return x29, Return x62);
+ { uint64_t x66 = (x49 & 0xfffffffffffff);
+ { uint64_t x68, uint8_t x69 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x65, Return x32, Return x66);
+ { uint64_t x70 = (x49 & 0xfffffffffffff);
+ { uint64_t x72, uint8_t x73 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x69, Return x35, Return x70);
+ { uint64_t x74 = (x49 & 0xfffffffffffff);
+ { uint64_t x76, uint8_t x77 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x73, Return x38, Return x74);
+ { uint64_t x78 = (x49 & 0xfffffffffffff);
+ { uint64_t x80, uint8_t x81 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x77, Return x41, Return x78);
+ { uint64_t x82 = (x49 & 0xfffffffffffff);
+ { uint64_t x84, uint8_t x85 = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x81, Return x44, Return x82);
+ { uint64_t x86 = (x49 & 0xfffffffffffff);
+ { uint64_t x88, uint8_t _ = Op (Syntax.AddWithGetCarry 52 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x85, Return x47, Return x86);
+ out[0] = x52;
+ out[1] = x56;
+ out[2] = x60;
+ out[3] = x64;
+ out[4] = x68;
+ out[5] = x72;
+ out[6] = x76;
+ out[7] = x80;
+ out[8] = x84;
+ out[9] = x88;
+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
-// caller: uint64_t out[4];